xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 850e275390052b330d93020bf619a739a3c277ac)
1 /*	$OpenBSD: session.c,v 1.284 2008/09/11 14:49:58 henning Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/un.h>
25 #include <net/if_types.h>
26 #include <netinet/in.h>
27 #include <netinet/in_systm.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 
32 #include <err.h>
33 #include <errno.h>
34 #include <fcntl.h>
35 #include <limits.h>
36 #include <poll.h>
37 #include <pwd.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 
44 #include "bgpd.h"
45 #include "mrt.h"
46 #include "session.h"
47 
48 #define PFD_PIPE_MAIN		0
49 #define PFD_PIPE_ROUTE		1
50 #define PFD_PIPE_ROUTE_CTL	2
51 #define PFD_SOCK_CTL		3
52 #define PFD_SOCK_RCTL		4
53 #define PFD_LISTENERS_START	5
54 
55 void	session_sighdlr(int);
56 int	setup_listeners(u_int *);
57 void	init_conf(struct bgpd_config *);
58 void	init_peer(struct peer *);
59 void	start_timer_holdtime(struct peer *);
60 void	start_timer_keepalive(struct peer *);
61 void	session_close_connection(struct peer *);
62 void	change_state(struct peer *, enum session_state, enum session_events);
63 int	session_setup_socket(struct peer *);
64 void	session_accept(int);
65 int	session_connect(struct peer *);
66 void	session_tcp_established(struct peer *);
67 void	session_capa_ann_none(struct peer *);
68 int	session_capa_add(struct peer *, struct buf *, u_int8_t, u_int8_t,
69 	    u_int8_t *);
70 int	session_capa_add_mp(struct buf *, u_int16_t, u_int8_t);
71 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
72 int	session_sendmsg(struct bgp_msg *, struct peer *);
73 void	session_open(struct peer *);
74 void	session_keepalive(struct peer *);
75 void	session_update(u_int32_t, void *, size_t);
76 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
77 	    ssize_t);
78 void	session_rrefresh(struct peer *, u_int16_t, u_int8_t);
79 int	session_dispatch_msg(struct pollfd *, struct peer *);
80 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
81 int	parse_open(struct peer *);
82 int	parse_update(struct peer *);
83 int	parse_refresh(struct peer *);
84 int	parse_notification(struct peer *);
85 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
86 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
87 void	session_up(struct peer *);
88 void	session_down(struct peer *);
89 void	session_demote(struct peer *, int);
90 
91 int			 la_cmp(struct listen_addr *, struct listen_addr *);
92 struct peer		*getpeerbyip(struct sockaddr *);
93 int			 session_match_mask(struct peer *, struct sockaddr *);
94 struct peer		*getpeerbyid(u_int32_t);
95 static struct sockaddr	*addr2sa(struct bgpd_addr *, u_int16_t);
96 
97 struct bgpd_config	*conf, *nconf = NULL;
98 struct bgpd_sysdep	 sysdep;
99 struct peer		*npeers;
100 volatile sig_atomic_t	 session_quit = 0;
101 int			 pending_reconf = 0;
102 int			 csock = -1, rcsock = -1;
103 u_int			 peer_cnt;
104 struct imsgbuf		*ibuf_rde;
105 struct imsgbuf		*ibuf_rde_ctl;
106 struct imsgbuf		*ibuf_main;
107 
108 struct mrt_head		 mrthead;
109 
110 void
111 session_sighdlr(int sig)
112 {
113 	switch (sig) {
114 	case SIGINT:
115 	case SIGTERM:
116 		session_quit = 1;
117 		break;
118 	}
119 }
120 
121 int
122 setup_listeners(u_int *la_cnt)
123 {
124 	int			 ttl = 255;
125 	int			 opt;
126 	struct listen_addr	*la;
127 	u_int			 cnt = 0;
128 
129 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
130 		la->reconf = RECONF_NONE;
131 		cnt++;
132 
133 		if (la->flags & LISTENER_LISTENING)
134 			continue;
135 
136 		if (la->fd == -1) {
137 			log_warn("cannot establish listener on %s: invalid fd",
138 			    log_sockaddr((struct sockaddr *)&la->sa));
139 			continue;
140 		}
141 
142 		opt = 1;
143 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
144 		    &opt, sizeof(opt)) == -1) {
145 			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
146 				log_warnx("md5sig not available, disabling");
147 				sysdep.no_md5sig = 1;
148 			} else
149 				fatal("setsockopt TCP_MD5SIG");
150 		}
151 
152 		/* set ttl to 255 so that ttl-security works */
153 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
154 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
155 			log_warn("setup_listeners setsockopt TTL");
156 			continue;
157 		}
158 
159 		session_socket_blockmode(la->fd, BM_NONBLOCK);
160 
161 		if (listen(la->fd, MAX_BACKLOG)) {
162 			close(la->fd);
163 			fatal("listen");
164 		}
165 
166 		la->flags |= LISTENER_LISTENING;
167 
168 		log_info("listening on %s",
169 		    log_sockaddr((struct sockaddr *)&la->sa));
170 	}
171 
172 	*la_cnt = cnt;
173 
174 	return (0);
175 }
176 
177 pid_t
178 session_main(struct bgpd_config *config, struct peer *cpeers,
179     struct network_head *net_l, struct filter_head *rules,
180     struct mrt_head *m_l, int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2],
181     int pipe_s2rctl[2])
182 {
183 	int			 nfds, timeout;
184 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
185 	pid_t			 pid;
186 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
187 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
188 	u_int			 new_cnt;
189 	u_int32_t		 ctl_queued;
190 	struct passwd		*pw;
191 	struct peer		*p, **peer_l = NULL, *last, *next;
192 	struct network		*net;
193 	struct mrt		*m, **mrt_l = NULL;
194 	struct filter_rule	*r;
195 	struct pollfd		*pfd = NULL;
196 	struct ctl_conn		*ctl_conn;
197 	struct listen_addr	*la;
198 	void			*newp;
199 	short			 events;
200 
201 	conf = config;
202 	peers = cpeers;
203 
204 	switch (pid = fork()) {
205 	case -1:
206 		fatal("cannot fork");
207 	case 0:
208 		break;
209 	default:
210 		return (pid);
211 	}
212 
213 	/* control socket is outside chroot */
214 	if ((csock = control_init(0, conf->csock)) == -1)
215 		fatalx("control socket setup failed");
216 	if (conf->rcsock != NULL &&
217 	    (rcsock = control_init(1, conf->rcsock)) == -1)
218 		fatalx("control socket setup failed");
219 
220 	if ((pw = getpwnam(BGPD_USER)) == NULL)
221 		fatal(NULL);
222 
223 	if (chroot(pw->pw_dir) == -1)
224 		fatal("chroot");
225 	if (chdir("/") == -1)
226 		fatal("chdir(\"/\")");
227 
228 	setproctitle("session engine");
229 	bgpd_process = PROC_SE;
230 
231 	if (pfkey_init(&sysdep) == -1)
232 		fatalx("pfkey setup failed");
233 
234 	if (setgroups(1, &pw->pw_gid) ||
235 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
236 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
237 		fatal("can't drop privileges");
238 
239 	listener_cnt = 0;
240 	setup_listeners(&listener_cnt);
241 
242 	signal(SIGTERM, session_sighdlr);
243 	signal(SIGINT, session_sighdlr);
244 	signal(SIGPIPE, SIG_IGN);
245 	signal(SIGHUP, SIG_IGN);
246 	log_info("session engine ready");
247 	close(pipe_m2s[0]);
248 	close(pipe_s2r[1]);
249 	close(pipe_s2rctl[1]);
250 	close(pipe_m2r[0]);
251 	close(pipe_m2r[1]);
252 	init_conf(conf);
253 	if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
254 	    (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL ||
255 	    (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
256 		fatal(NULL);
257 	imsg_init(ibuf_rde, pipe_s2r[0]);
258 	imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]);
259 	imsg_init(ibuf_main, pipe_m2s[1]);
260 	TAILQ_INIT(&ctl_conns);
261 	control_listen(csock);
262 	control_listen(rcsock);
263 	LIST_INIT(&mrthead);
264 	peer_cnt = 0;
265 	ctl_cnt = 0;
266 
267 	/* filter rules are not used in the SE */
268 	while ((r = TAILQ_FIRST(rules)) != NULL) {
269 		TAILQ_REMOVE(rules, r, entry);
270 		free(r);
271 	}
272 	free(rules);
273 
274 	/* network list is not used in the SE */
275 	while ((net = TAILQ_FIRST(net_l)) != NULL) {
276 		TAILQ_REMOVE(net_l, net, entry);
277 		filterset_free(&net->net.attrset);
278 		free(net);
279 	}
280 
281 	/* main mrt list is not used in the SE */
282 	while ((m = LIST_FIRST(m_l)) != NULL) {
283 		LIST_REMOVE(m, entry);
284 		free(m);
285 	}
286 
287 	while (session_quit == 0) {
288 		/* check for peers to be initialized or deleted */
289 		last = NULL;
290 		for (p = peers; p != NULL; p = next) {
291 			next = p->next;
292 			if (!pending_reconf) {
293 				/* cloned peer that idled out? */
294 				if (p->state == STATE_IDLE && p->conf.cloned &&
295 				    time(NULL) - p->stats.last_updown >=
296 				    INTERVAL_HOLD_CLONED)
297 					p->conf.reconf_action = RECONF_DELETE;
298 
299 				/* new peer that needs init? */
300 				if (p->state == STATE_NONE)
301 					init_peer(p);
302 
303 				/* reinit due? */
304 				if (p->conf.reconf_action == RECONF_REINIT) {
305 					bgp_fsm(p, EVNT_STOP);
306 					timer_set(p, Timer_IdleHold, 0);
307 				}
308 
309 				/* deletion due? */
310 				if (p->conf.reconf_action == RECONF_DELETE) {
311 					if (p->demoted)
312 						session_demote(p, -1);
313 					p->conf.demote_group[0] = 0;
314 					bgp_fsm(p, EVNT_STOP);
315 					log_peer_warnx(&p->conf, "removed");
316 					if (last != NULL)
317 						last->next = next;
318 					else
319 						peers = next;
320 					timer_remove_all(p);
321 					free(p);
322 					peer_cnt--;
323 					continue;
324 				}
325 				p->conf.reconf_action = RECONF_NONE;
326 			}
327 			last = p;
328 		}
329 
330 		if (peer_cnt > peer_l_elms) {
331 			if ((newp = realloc(peer_l, sizeof(struct peer *) *
332 			    peer_cnt)) == NULL) {
333 				/* panic for now  */
334 				log_warn("could not resize peer_l from %u -> %u"
335 				    " entries", peer_l_elms, peer_cnt);
336 				fatalx("exiting");
337 			}
338 			peer_l = newp;
339 			peer_l_elms = peer_cnt;
340 		}
341 
342 		mrt_cnt = 0;
343 		LIST_FOREACH(m, &mrthead, entry)
344 			if (m->queued)
345 				mrt_cnt++;
346 
347 		if (mrt_cnt > mrt_l_elms) {
348 			if ((newp = realloc(mrt_l, sizeof(struct mrt *) *
349 			    mrt_cnt)) == NULL) {
350 				/* panic for now  */
351 				log_warn("could not resize mrt_l from %u -> %u"
352 				    " entries", mrt_l_elms, mrt_cnt);
353 				fatalx("exiting");
354 			}
355 			mrt_l = newp;
356 			mrt_l_elms = mrt_cnt;
357 		}
358 
359 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
360 		    ctl_cnt + mrt_cnt;
361 		if (new_cnt > pfd_elms) {
362 			if ((newp = realloc(pfd, sizeof(struct pollfd) *
363 			    new_cnt)) == NULL) {
364 				/* panic for now  */
365 				log_warn("could not resize pfd from %u -> %u"
366 				    " entries", pfd_elms, new_cnt);
367 				fatalx("exiting");
368 			}
369 			pfd = newp;
370 			pfd_elms = new_cnt;
371 		}
372 
373 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
374 		pfd[PFD_PIPE_MAIN].fd = ibuf_main->fd;
375 		pfd[PFD_PIPE_MAIN].events = POLLIN;
376 		if (ibuf_main->w.queued > 0)
377 			pfd[PFD_PIPE_MAIN].events |= POLLOUT;
378 		pfd[PFD_PIPE_ROUTE].fd = ibuf_rde->fd;
379 		pfd[PFD_PIPE_ROUTE].events = POLLIN;
380 		if (ibuf_rde->w.queued > 0)
381 			pfd[PFD_PIPE_ROUTE].events |= POLLOUT;
382 
383 		ctl_queued = 0;
384 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry)
385 			ctl_queued += ctl_conn->ibuf.w.queued;
386 
387 		pfd[PFD_PIPE_ROUTE_CTL].fd = ibuf_rde_ctl->fd;
388 		if (ctl_queued < SESSION_CTL_QUEUE_MAX)
389 			/*
390 			 * Do not act as unlimited buffer. Don't read in more
391 			 * messages if the ctl sockets are getting full.
392 			 */
393 			pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN;
394 		pfd[PFD_SOCK_CTL].fd = csock;
395 		pfd[PFD_SOCK_CTL].events = POLLIN;
396 		pfd[PFD_SOCK_RCTL].fd = rcsock;
397 		pfd[PFD_SOCK_RCTL].events = POLLIN;
398 
399 		i = PFD_LISTENERS_START;
400 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
401 			pfd[i].fd = la->fd;
402 			pfd[i].events = POLLIN;
403 			i++;
404 		}
405 		idx_listeners = i;
406 		timeout = 240;	/* loop every 240s at least */
407 
408 		for (p = peers; p != NULL; p = p->next) {
409 			time_t	nextaction;
410 			struct peer_timer *pt;
411 
412 			/* check timers */
413 			if ((pt = timer_nextisdue(p)) != NULL) {
414 				switch (pt->type) {
415 				case Timer_Hold:
416 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
417 					break;
418 				case Timer_ConnectRetry:
419 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
420 					break;
421 				case Timer_Keepalive:
422 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
423 					break;
424 				case Timer_IdleHold:
425 					bgp_fsm(p, EVNT_START);
426 					break;
427 				case Timer_IdleHoldReset:
428 					p->IdleHoldTime /= 2;
429 					if (p->IdleHoldTime <=
430 					    INTERVAL_IDLE_HOLD_INITIAL) {
431 						p->IdleHoldTime =
432 						    INTERVAL_IDLE_HOLD_INITIAL;
433 						timer_stop(p,
434 						    Timer_IdleHoldReset);
435 						p->errcnt = 0;
436 					} else
437 						timer_set(p,
438 						    Timer_IdleHoldReset,
439 						    p->IdleHoldTime);
440 					break;
441 				case Timer_CarpUndemote:
442 					timer_stop(p, Timer_CarpUndemote);
443 					if (p->demoted &&
444 					    p->state == STATE_ESTABLISHED)
445 						session_demote(p, -1);
446 					break;
447 				default:
448 					fatalx("King Bula lost in time");
449 				}
450 			}
451 			if ((nextaction = timer_nextduein(p)) != -1 &&
452 			    nextaction < timeout)
453 				timeout = nextaction;
454 
455 			/* are we waiting for a write? */
456 			events = POLLIN;
457 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
458 				events |= POLLOUT;
459 
460 			/* poll events */
461 			if (p->fd != -1 && events != 0) {
462 				pfd[i].fd = p->fd;
463 				pfd[i].events = events;
464 				peer_l[i - idx_listeners] = p;
465 				i++;
466 			}
467 		}
468 
469 		idx_peers = i;
470 
471 		LIST_FOREACH(m, &mrthead, entry)
472 			if (m->queued) {
473 				pfd[i].fd = m->fd;
474 				pfd[i].events = POLLOUT;
475 				mrt_l[i - idx_peers] = m;
476 				i++;
477 			}
478 
479 		idx_mrts = i;
480 
481 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
482 			pfd[i].fd = ctl_conn->ibuf.fd;
483 			pfd[i].events = POLLIN;
484 			if (ctl_conn->ibuf.w.queued > 0)
485 				pfd[i].events |= POLLOUT;
486 			i++;
487 		}
488 
489 		if (timeout < 0)
490 			timeout = 0;
491 		if ((nfds = poll(pfd, i, timeout * 1000)) == -1)
492 			if (errno != EINTR)
493 				fatal("poll error");
494 
495 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLOUT)
496 			if (msgbuf_write(&ibuf_main->w) < 0)
497 				fatal("pipe write error");
498 
499 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLIN) {
500 			nfds--;
501 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
502 			    &listener_cnt);
503 		}
504 
505 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLOUT)
506 			if (msgbuf_write(&ibuf_rde->w) < 0)
507 				fatal("pipe write error");
508 
509 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLIN) {
510 			nfds--;
511 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
512 			    &listener_cnt);
513 		}
514 
515 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE_CTL].revents & POLLIN) {
516 			nfds--;
517 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
518 			    &listener_cnt);
519 		}
520 
521 		if (nfds > 0 && pfd[PFD_SOCK_CTL].revents & POLLIN) {
522 			nfds--;
523 			ctl_cnt += control_accept(csock, 0);
524 		}
525 
526 		if (nfds > 0 && pfd[PFD_SOCK_RCTL].revents & POLLIN) {
527 			nfds--;
528 			ctl_cnt += control_accept(rcsock, 1);
529 		}
530 
531 		for (j = PFD_LISTENERS_START; nfds > 0 && j < idx_listeners;
532 		    j++)
533 			if (pfd[j].revents & POLLIN) {
534 				nfds--;
535 				session_accept(pfd[j].fd);
536 			}
537 
538 		for (; nfds > 0 && j < idx_peers; j++)
539 			nfds -= session_dispatch_msg(&pfd[j],
540 			    peer_l[j - idx_listeners]);
541 
542 		for (; nfds > 0 && j < idx_mrts; j++)
543 			if (pfd[j].revents & POLLOUT) {
544 				nfds--;
545 				mrt_write(mrt_l[j - idx_peers]);
546 			}
547 
548 		for (; nfds > 0 && j < i; j++)
549 			nfds -= control_dispatch_msg(&pfd[j], &ctl_cnt);
550 	}
551 
552 	while ((p = peers) != NULL) {
553 		peers = p->next;
554 		bgp_fsm(p, EVNT_STOP);
555 		pfkey_remove(p);
556 		free(p);
557 	}
558 
559 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
560 		mrt_clean(m);
561 		LIST_REMOVE(m, entry);
562 		free(m);
563 	}
564 
565 	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
566 		TAILQ_REMOVE(conf->listen_addrs, la, entry);
567 		free(la);
568 	}
569 	free(conf->listen_addrs);
570 	free(peer_l);
571 	free(mrt_l);
572 	free(pfd);
573 
574 	msgbuf_write(&ibuf_rde->w);
575 	msgbuf_clear(&ibuf_rde->w);
576 	free(ibuf_rde);
577 	msgbuf_write(&ibuf_main->w);
578 	msgbuf_clear(&ibuf_main->w);
579 	free(ibuf_main);
580 
581 	control_shutdown(csock);
582 	control_shutdown(rcsock);
583 	log_info("session engine exiting");
584 	_exit(0);
585 }
586 
587 void
588 init_conf(struct bgpd_config *c)
589 {
590 	if (!c->holdtime)
591 		c->holdtime = INTERVAL_HOLD;
592 }
593 
594 void
595 init_peer(struct peer *p)
596 {
597 	TAILQ_INIT(&p->timers);
598 	p->fd = p->wbuf.fd = -1;
599 
600 	if (p->conf.if_depend[0])
601 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
602 		    p->conf.if_depend, sizeof(p->conf.if_depend));
603 	else
604 		p->depend_ok = 1;
605 
606 	peer_cnt++;
607 
608 	change_state(p, STATE_IDLE, EVNT_NONE);
609 	if (p->conf.down)
610 		timer_stop(p, Timer_IdleHold);		/* no autostart */
611 	else
612 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
613 
614 	/*
615 	 * on startup, demote if requested.
616 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
617 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
618 	 */
619 	if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0])
620 		session_demote(p, +1);
621 }
622 
623 void
624 bgp_fsm(struct peer *peer, enum session_events event)
625 {
626 	switch (peer->state) {
627 	case STATE_NONE:
628 		/* nothing */
629 		break;
630 	case STATE_IDLE:
631 		switch (event) {
632 		case EVNT_START:
633 			timer_stop(peer, Timer_Hold);
634 			timer_stop(peer, Timer_Keepalive);
635 			timer_stop(peer, Timer_IdleHold);
636 
637 			/* allocate read buffer */
638 			peer->rbuf = calloc(1, sizeof(struct buf_read));
639 			if (peer->rbuf == NULL)
640 				fatal(NULL);
641 			peer->rbuf->wpos = 0;
642 
643 			/* init write buffer */
644 			msgbuf_init(&peer->wbuf);
645 
646 			/* init pfkey - remove old if any, load new ones */
647 			pfkey_remove(peer);
648 			if (pfkey_establish(peer) == -1) {
649 				log_peer_warnx(&peer->conf,
650 				    "pfkey setup failed");
651 				return;
652 			}
653 
654 			peer->stats.last_sent_errcode = 0;
655 			peer->stats.last_sent_suberr = 0;
656 
657 			if (!peer->depend_ok)
658 				timer_stop(peer, Timer_ConnectRetry);
659 			else if (peer->passive || peer->conf.passive ||
660 			    peer->conf.template) {
661 				change_state(peer, STATE_ACTIVE, event);
662 				timer_stop(peer, Timer_ConnectRetry);
663 			} else {
664 				change_state(peer, STATE_CONNECT, event);
665 				timer_set(peer, Timer_ConnectRetry,
666 				    INTERVAL_CONNECTRETRY);
667 				session_connect(peer);
668 			}
669 			peer->passive = 0;
670 			break;
671 		default:
672 			/* ignore */
673 			break;
674 		}
675 		break;
676 	case STATE_CONNECT:
677 		switch (event) {
678 		case EVNT_START:
679 			/* ignore */
680 			break;
681 		case EVNT_CON_OPEN:
682 			session_tcp_established(peer);
683 			session_open(peer);
684 			timer_stop(peer, Timer_ConnectRetry);
685 			peer->holdtime = INTERVAL_HOLD_INITIAL;
686 			start_timer_holdtime(peer);
687 			change_state(peer, STATE_OPENSENT, event);
688 			break;
689 		case EVNT_CON_OPENFAIL:
690 			timer_set(peer, Timer_ConnectRetry,
691 			    INTERVAL_CONNECTRETRY);
692 			session_close_connection(peer);
693 			change_state(peer, STATE_ACTIVE, event);
694 			break;
695 		case EVNT_TIMER_CONNRETRY:
696 			timer_set(peer, Timer_ConnectRetry,
697 			    INTERVAL_CONNECTRETRY);
698 			session_connect(peer);
699 			break;
700 		default:
701 			change_state(peer, STATE_IDLE, event);
702 			break;
703 		}
704 		break;
705 	case STATE_ACTIVE:
706 		switch (event) {
707 		case EVNT_START:
708 			/* ignore */
709 			break;
710 		case EVNT_CON_OPEN:
711 			session_tcp_established(peer);
712 			session_open(peer);
713 			timer_stop(peer, Timer_ConnectRetry);
714 			peer->holdtime = INTERVAL_HOLD_INITIAL;
715 			start_timer_holdtime(peer);
716 			change_state(peer, STATE_OPENSENT, event);
717 			break;
718 		case EVNT_CON_OPENFAIL:
719 			timer_set(peer, Timer_ConnectRetry,
720 			    INTERVAL_CONNECTRETRY);
721 			session_close_connection(peer);
722 			change_state(peer, STATE_ACTIVE, event);
723 			break;
724 		case EVNT_TIMER_CONNRETRY:
725 			timer_set(peer, Timer_ConnectRetry,
726 			    peer->holdtime);
727 			change_state(peer, STATE_CONNECT, event);
728 			session_connect(peer);
729 			break;
730 		default:
731 			change_state(peer, STATE_IDLE, event);
732 			break;
733 		}
734 		break;
735 	case STATE_OPENSENT:
736 		switch (event) {
737 		case EVNT_START:
738 			/* ignore */
739 			break;
740 		case EVNT_STOP:
741 			session_notification(peer, ERR_CEASE, 0, NULL, 0);
742 			change_state(peer, STATE_IDLE, event);
743 			break;
744 		case EVNT_CON_CLOSED:
745 			session_close_connection(peer);
746 			timer_set(peer, Timer_ConnectRetry,
747 			    INTERVAL_CONNECTRETRY);
748 			change_state(peer, STATE_ACTIVE, event);
749 			break;
750 		case EVNT_CON_FATAL:
751 			change_state(peer, STATE_IDLE, event);
752 			break;
753 		case EVNT_TIMER_HOLDTIME:
754 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
755 			    0, NULL, 0);
756 			change_state(peer, STATE_IDLE, event);
757 			break;
758 		case EVNT_RCVD_OPEN:
759 			/* parse_open calls change_state itself on failure */
760 			if (parse_open(peer))
761 				break;
762 			session_keepalive(peer);
763 			change_state(peer, STATE_OPENCONFIRM, event);
764 			break;
765 		case EVNT_RCVD_NOTIFICATION:
766 			if (parse_notification(peer)) {
767 				change_state(peer, STATE_IDLE, event);
768 				/* don't punish, capa negotiation */
769 				timer_set(peer, Timer_IdleHold, 0);
770 				peer->IdleHoldTime /= 2;
771 			} else
772 				change_state(peer, STATE_IDLE, event);
773 			break;
774 		default:
775 			session_notification(peer, ERR_FSM, 0, NULL, 0);
776 			change_state(peer, STATE_IDLE, event);
777 			break;
778 		}
779 		break;
780 	case STATE_OPENCONFIRM:
781 		switch (event) {
782 		case EVNT_START:
783 			/* ignore */
784 			break;
785 		case EVNT_STOP:
786 			session_notification(peer, ERR_CEASE, 0, NULL, 0);
787 			change_state(peer, STATE_IDLE, event);
788 			break;
789 		case EVNT_CON_CLOSED:
790 		case EVNT_CON_FATAL:
791 			change_state(peer, STATE_IDLE, event);
792 			break;
793 		case EVNT_TIMER_HOLDTIME:
794 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
795 			    0, NULL, 0);
796 			change_state(peer, STATE_IDLE, event);
797 			break;
798 		case EVNT_TIMER_KEEPALIVE:
799 			session_keepalive(peer);
800 			break;
801 		case EVNT_RCVD_KEEPALIVE:
802 			start_timer_holdtime(peer);
803 			change_state(peer, STATE_ESTABLISHED, event);
804 			break;
805 		case EVNT_RCVD_NOTIFICATION:
806 			parse_notification(peer);
807 			change_state(peer, STATE_IDLE, event);
808 			break;
809 		default:
810 			session_notification(peer, ERR_FSM, 0, NULL, 0);
811 			change_state(peer, STATE_IDLE, event);
812 			break;
813 		}
814 		break;
815 	case STATE_ESTABLISHED:
816 		switch (event) {
817 		case EVNT_START:
818 			/* ignore */
819 			break;
820 		case EVNT_STOP:
821 			session_notification(peer, ERR_CEASE, 0, NULL, 0);
822 			change_state(peer, STATE_IDLE, event);
823 			break;
824 		case EVNT_CON_CLOSED:
825 		case EVNT_CON_FATAL:
826 			change_state(peer, STATE_IDLE, event);
827 			break;
828 		case EVNT_TIMER_HOLDTIME:
829 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
830 			    0, NULL, 0);
831 			change_state(peer, STATE_IDLE, event);
832 			break;
833 		case EVNT_TIMER_KEEPALIVE:
834 			session_keepalive(peer);
835 			break;
836 		case EVNT_RCVD_KEEPALIVE:
837 			start_timer_holdtime(peer);
838 			break;
839 		case EVNT_RCVD_UPDATE:
840 			start_timer_holdtime(peer);
841 			if (parse_update(peer))
842 				change_state(peer, STATE_IDLE, event);
843 			else
844 				start_timer_holdtime(peer);
845 			break;
846 		case EVNT_RCVD_NOTIFICATION:
847 			parse_notification(peer);
848 			change_state(peer, STATE_IDLE, event);
849 			break;
850 		default:
851 			session_notification(peer, ERR_FSM, 0, NULL, 0);
852 			change_state(peer, STATE_IDLE, event);
853 			break;
854 		}
855 		break;
856 	}
857 }
858 
859 void
860 start_timer_holdtime(struct peer *peer)
861 {
862 	if (peer->holdtime > 0)
863 		timer_set(peer, Timer_Hold, peer->holdtime);
864 	else
865 		timer_stop(peer, Timer_Hold);
866 }
867 
868 void
869 start_timer_keepalive(struct peer *peer)
870 {
871 	if (peer->holdtime > 0)
872 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
873 	else
874 		timer_stop(peer, Timer_Keepalive);
875 }
876 
877 void
878 session_close_connection(struct peer *peer)
879 {
880 	if (peer->fd != -1)
881 		close(peer->fd);
882 
883 	peer->fd = peer->wbuf.fd = -1;
884 }
885 
886 void
887 change_state(struct peer *peer, enum session_state state,
888     enum session_events event)
889 {
890 	struct mrt	*mrt;
891 
892 	switch (state) {
893 	case STATE_IDLE:
894 		/* carp demotion first. new peers handled in init_peer */
895 		if (peer->state == STATE_ESTABLISHED &&
896 		    peer->conf.demote_group[0] && !peer->demoted)
897 			session_demote(peer, +1);
898 
899 		/*
900 		 * try to write out what's buffered (maybe a notification),
901 		 * don't bother if it fails
902 		 */
903 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
904 			msgbuf_write(&peer->wbuf);
905 
906 		/*
907 		 * we must start the timer for the next EVNT_START
908 		 * if we are coming here due to an error and the
909 		 * session was not established successfully before, the
910 		 * starttimerinterval needs to be exponentially increased
911 		 */
912 		if (peer->IdleHoldTime == 0)
913 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
914 		peer->holdtime = INTERVAL_HOLD_INITIAL;
915 		timer_stop(peer, Timer_ConnectRetry);
916 		timer_stop(peer, Timer_Keepalive);
917 		timer_stop(peer, Timer_Hold);
918 		timer_stop(peer, Timer_IdleHoldReset);
919 		session_close_connection(peer);
920 		msgbuf_clear(&peer->wbuf);
921 		free(peer->rbuf);
922 		peer->rbuf = NULL;
923 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
924 		if (peer->state == STATE_ESTABLISHED)
925 			session_down(peer);
926 		if (event != EVNT_STOP) {
927 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
928 			if (event != EVNT_NONE &&
929 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
930 				peer->IdleHoldTime *= 2;
931 		}
932 		if (peer->state == STATE_NONE ||
933 		    peer->state == STATE_ESTABLISHED) {
934 			/* initialize capability negotiation structures */
935 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
936 			    sizeof(peer->capa.ann));
937 			if (!peer->conf.announce_capa)
938 				session_capa_ann_none(peer);
939 		}
940 		break;
941 	case STATE_CONNECT:
942 		break;
943 	case STATE_ACTIVE:
944 		break;
945 	case STATE_OPENSENT:
946 		break;
947 	case STATE_OPENCONFIRM:
948 		break;
949 	case STATE_ESTABLISHED:
950 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
951 		if (peer->demoted)
952 			timer_set(peer, Timer_CarpUndemote,
953 			    INTERVAL_HOLD_DEMOTED);
954 		session_up(peer);
955 		break;
956 	default:		/* something seriously fucked */
957 		break;
958 	}
959 
960 	log_statechange(peer, state, event);
961 	LIST_FOREACH(mrt, &mrthead, entry) {
962 		if (mrt->type != MRT_ALL_IN && mrt->type != MRT_ALL_OUT)
963 			continue;
964 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
965 		    mrt->peer_id == peer->conf.id ||
966 		    mrt->group_id == peer->conf.groupid)
967 			mrt_dump_state(mrt, peer->state, state,
968 			    peer, conf);
969 	}
970 	peer->prev_state = peer->state;
971 	peer->state = state;
972 }
973 
974 void
975 session_accept(int listenfd)
976 {
977 	int			 connfd;
978 	int			 opt;
979 	socklen_t		 len;
980 	struct sockaddr_storage	 cliaddr;
981 	struct peer		*p = NULL;
982 
983 	len = sizeof(cliaddr);
984 	if ((connfd = accept(listenfd,
985 	    (struct sockaddr *)&cliaddr, &len)) == -1) {
986 		if (errno == EWOULDBLOCK || errno == EINTR)
987 			return;
988 		else
989 			log_warn("accept");
990 	}
991 
992 	p = getpeerbyip((struct sockaddr *)&cliaddr);
993 
994 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
995 		if (timer_running(p, Timer_IdleHold, NULL)) {
996 			/* fast reconnect after clear */
997 			p->passive = 1;
998 			bgp_fsm(p, EVNT_START);
999 		}
1000 	}
1001 
1002 	if (p != NULL &&
1003 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1004 		if (p->fd != -1) {
1005 			if (p->state == STATE_CONNECT)
1006 				session_close_connection(p);
1007 			else {
1008 				close(connfd);
1009 				return;
1010 			}
1011 		}
1012 
1013 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1014 			log_peer_warnx(&p->conf,
1015 			    "ipsec or md5sig configured but not available");
1016 			close(connfd);
1017 			return;
1018 		}
1019 
1020 		if (p->conf.auth.method == AUTH_MD5SIG) {
1021 			if (sysdep.no_md5sig) {
1022 				log_peer_warnx(&p->conf,
1023 				    "md5sig configured but not available");
1024 				close(connfd);
1025 				return;
1026 			}
1027 			len = sizeof(opt);
1028 			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1029 			    &opt, &len) == -1)
1030 				fatal("getsockopt TCP_MD5SIG");
1031 			if (!opt) {	/* non-md5'd connection! */
1032 				log_peer_warnx(&p->conf,
1033 				    "connection attempt without md5 signature");
1034 				close(connfd);
1035 				return;
1036 			}
1037 		}
1038 		p->fd = p->wbuf.fd = connfd;
1039 		if (session_setup_socket(p)) {
1040 			close(connfd);
1041 			return;
1042 		}
1043 		session_socket_blockmode(connfd, BM_NONBLOCK);
1044 		bgp_fsm(p, EVNT_CON_OPEN);
1045 	} else {
1046 		log_conn_attempt(p, (struct sockaddr *)&cliaddr);
1047 		close(connfd);
1048 	}
1049 }
1050 
1051 int
1052 session_connect(struct peer *peer)
1053 {
1054 	int			 opt = 1;
1055 	struct sockaddr		*sa;
1056 
1057 	/*
1058 	 * we do not need the overcomplicated collision detection RFC 1771
1059 	 * describes; we simply make sure there is only ever one concurrent
1060 	 * tcp connection per peer.
1061 	 */
1062 	if (peer->fd != -1)
1063 		return (-1);
1064 
1065 	if ((peer->fd = socket(peer->conf.remote_addr.af, SOCK_STREAM,
1066 	    IPPROTO_TCP)) == -1) {
1067 		log_peer_warn(&peer->conf, "session_connect socket");
1068 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1069 		return (-1);
1070 	}
1071 
1072 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1073 		log_peer_warnx(&peer->conf,
1074 		    "ipsec or md5sig configured but not available");
1075 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1076 		return (-1);
1077 	}
1078 
1079 	if (peer->conf.auth.method == AUTH_MD5SIG) {
1080 		if (sysdep.no_md5sig) {
1081 			log_peer_warnx(&peer->conf,
1082 			    "md5sig configured but not available");
1083 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1084 			return (-1);
1085 		}
1086 		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1087 		    &opt, sizeof(opt)) == -1) {
1088 			log_peer_warn(&peer->conf, "setsockopt md5sig");
1089 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1090 			return (-1);
1091 		}
1092 	}
1093 	peer->wbuf.fd = peer->fd;
1094 
1095 	/* if update source is set we need to bind() */
1096 	if (peer->conf.local_addr.af) {
1097 		sa = addr2sa(&peer->conf.local_addr, 0);
1098 		if (bind(peer->fd, sa, sa->sa_len) == -1) {
1099 			log_peer_warn(&peer->conf, "session_connect bind");
1100 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1101 			return (-1);
1102 		}
1103 	}
1104 
1105 	if (session_setup_socket(peer)) {
1106 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1107 		return (-1);
1108 	}
1109 
1110 	session_socket_blockmode(peer->fd, BM_NONBLOCK);
1111 
1112 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT);
1113 	if (connect(peer->fd, sa, sa->sa_len) == -1) {
1114 		if (errno != EINPROGRESS) {
1115 			if (errno != peer->lasterr)
1116 				log_peer_warn(&peer->conf, "connect");
1117 			peer->lasterr = errno;
1118 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1119 			return (-1);
1120 		}
1121 	} else
1122 		bgp_fsm(peer, EVNT_CON_OPEN);
1123 
1124 	return (0);
1125 }
1126 
1127 int
1128 session_setup_socket(struct peer *p)
1129 {
1130 	int	ttl = p->conf.distance;
1131 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1132 	int	nodelay = 1;
1133 	int	bsize;
1134 
1135 	if (p->conf.ebgp && p->conf.remote_addr.af == AF_INET) {
1136 		/* set TTL to foreign router's distance - 1=direct n=multihop
1137 		   with ttlsec, we always use 255 */
1138 		if (p->conf.ttlsec) {
1139 			ttl = 256 - p->conf.distance;
1140 			if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, &ttl,
1141 			    sizeof(ttl)) == -1) {
1142 				log_peer_warn(&p->conf,
1143 				    "session_setup_socket setsockopt MINTTL");
1144 				return (-1);
1145 			}
1146 			ttl = 255;
1147 		}
1148 
1149 		if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1150 		    sizeof(ttl)) == -1) {
1151 			log_peer_warn(&p->conf,
1152 			    "session_setup_socket setsockopt TTL");
1153 			return (-1);
1154 		}
1155 	}
1156 
1157 	if (p->conf.ebgp && p->conf.remote_addr.af == AF_INET6)
1158 		/* set hoplimit to foreign router's distance */
1159 		if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl,
1160 		    sizeof(ttl)) == -1) {
1161 			log_peer_warn(&p->conf,
1162 			    "session_setup_socket setsockopt hoplimit");
1163 			return (-1);
1164 		}
1165 
1166 	/* if ttlsec is in use, set minttl */
1167 	if (p->conf.ttlsec) {
1168 		ttl = 256 - p->conf.distance;
1169 		setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, &ttl, sizeof(ttl));
1170 
1171 	}
1172 
1173 	/* set TCP_NODELAY */
1174 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1175 	    sizeof(nodelay)) == -1) {
1176 		log_peer_warn(&p->conf,
1177 		    "session_setup_socket setsockopt TCP_NODELAY");
1178 		return (-1);
1179 	}
1180 
1181 	/* set precedence, see RFC 1771 appendix 5 */
1182 	if (p->conf.remote_addr.af == AF_INET &&
1183 	    setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == -1) {
1184 		log_peer_warn(&p->conf,
1185 		    "session_setup_socket setsockopt TOS");
1186 		return (-1);
1187 	}
1188 
1189 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1190 	if (p->conf.auth.method != AUTH_NONE) {
1191 		/* try to increase bufsize. no biggie if it fails */
1192 		bsize = 65535;
1193 		while (setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1194 		    sizeof(bsize)) == -1)
1195 			bsize /= 2;
1196 		bsize = 65535;
1197 		while (setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1198 		    sizeof(bsize)) == -1)
1199 			bsize /= 2;
1200 	}
1201 
1202 	return (0);
1203 }
1204 
1205 void
1206 session_socket_blockmode(int fd, enum blockmodes bm)
1207 {
1208 	int	flags;
1209 
1210 	if ((flags = fcntl(fd, F_GETFL, 0)) == -1)
1211 		fatal("fcntl F_GETFL");
1212 
1213 	if (bm == BM_NONBLOCK)
1214 		flags |= O_NONBLOCK;
1215 	else
1216 		flags &= ~O_NONBLOCK;
1217 
1218 	if ((flags = fcntl(fd, F_SETFL, flags)) == -1)
1219 		fatal("fcntl F_SETFL");
1220 }
1221 
1222 void
1223 session_tcp_established(struct peer *peer)
1224 {
1225 	socklen_t	len;
1226 
1227 	len = sizeof(peer->sa_local);
1228 	if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local,
1229 	    &len) == -1)
1230 		log_warn("getsockname");
1231 	len = sizeof(peer->sa_remote);
1232 	if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote,
1233 	    &len) == -1)
1234 		log_warn("getpeername");
1235 }
1236 
1237 void
1238 session_capa_ann_none(struct peer *peer)
1239 {
1240 	peer->capa.ann.mp_v4 = SAFI_NONE;
1241 	peer->capa.ann.mp_v4 = SAFI_NONE;
1242 	peer->capa.ann.refresh = 0;
1243 	peer->capa.ann.restart = 0;
1244 	peer->capa.ann.as4byte = 0;
1245 }
1246 
1247 int
1248 session_capa_add(struct peer *p, struct buf *opb, u_int8_t capa_code,
1249     u_int8_t capa_len, u_int8_t *optparamlen)
1250 {
1251 	u_int8_t	op_type, op_len, tot_len, errs = 0;
1252 
1253 	op_type = OPT_PARAM_CAPABILITIES;
1254 	op_len = sizeof(capa_code) + sizeof(capa_len) + capa_len;
1255 	tot_len = sizeof(op_type) + sizeof(op_len) + op_len;
1256 	if (buf_grow(opb, tot_len) == NULL)
1257 		return (1);
1258 	errs += buf_add(opb, &op_type, sizeof(op_type));
1259 	errs += buf_add(opb, &op_len, sizeof(op_len));
1260 	errs += buf_add(opb, &capa_code, sizeof(capa_code));
1261 	errs += buf_add(opb, &capa_len, sizeof(capa_len));
1262 	*optparamlen += tot_len;
1263 	return (errs);
1264 }
1265 
1266 int
1267 session_capa_add_mp(struct buf *buf, u_int16_t afi, u_int8_t safi)
1268 {
1269 	u_int8_t		 pad = 0;
1270 	int			 errs = 0;
1271 
1272 	afi = htons(afi);
1273 	errs += buf_add(buf, &afi, sizeof(afi));
1274 	errs += buf_add(buf, &pad, sizeof(pad));
1275 	errs += buf_add(buf, &safi, sizeof(safi));
1276 
1277 	return (errs);
1278 }
1279 
1280 struct bgp_msg *
1281 session_newmsg(enum msg_type msgtype, u_int16_t len)
1282 {
1283 	struct bgp_msg		*msg;
1284 	struct msg_header	 hdr;
1285 	struct buf		*buf;
1286 	int			 errs = 0;
1287 
1288 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1289 	hdr.len = htons(len);
1290 	hdr.type = msgtype;
1291 
1292 	if ((buf = buf_open(len)) == NULL)
1293 		return (NULL);
1294 
1295 	errs += buf_add(buf, &hdr.marker, sizeof(hdr.marker));
1296 	errs += buf_add(buf, &hdr.len, sizeof(hdr.len));
1297 	errs += buf_add(buf, &hdr.type, sizeof(hdr.type));
1298 
1299 	if (errs > 0 ||
1300 	    (msg = calloc(1, sizeof(*msg))) == NULL) {
1301 		buf_free(buf);
1302 		return (NULL);
1303 	}
1304 
1305 	msg->buf = buf;
1306 	msg->type = msgtype;
1307 	msg->len = len;
1308 
1309 	return (msg);
1310 }
1311 
1312 int
1313 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1314 {
1315 	struct mrt		*mrt;
1316 
1317 	LIST_FOREACH(mrt, &mrthead, entry) {
1318 		if (mrt->type != MRT_ALL_OUT &&
1319 		    msg->type == UPDATE && mrt->type != MRT_UPDATE_OUT)
1320 			continue;
1321 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1322 		    mrt->peer_id == p->conf.id ||
1323 		    mrt->group_id == p->conf.groupid)
1324 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p, conf);
1325 	}
1326 
1327 	if (buf_close(&p->wbuf, msg->buf) == -1) {
1328 		log_peer_warn(&p->conf, "session_sendmsg buf_close");
1329 		buf_free(msg->buf);
1330 		free(msg);
1331 		return (-1);
1332 	}
1333 
1334 	free(msg);
1335 	return (0);
1336 }
1337 
1338 void
1339 session_open(struct peer *p)
1340 {
1341 	struct bgp_msg		*buf;
1342 	struct buf		*opb;
1343 	struct msg_open		 msg;
1344 	u_int16_t		 len;
1345 	u_int8_t		 optparamlen = 0;
1346 	u_int			 errs = 0;
1347 
1348 
1349 	if ((opb = buf_open(0)) == NULL) {
1350 		bgp_fsm(p, EVNT_CON_FATAL);
1351 		return;
1352 	}
1353 
1354 	/* multiprotocol extensions, RFC 4760 */
1355 	if (p->capa.ann.mp_v4) {	/* 4 bytes data */
1356 		errs += session_capa_add(p, opb, CAPA_MP, 4, &optparamlen);
1357 		errs += session_capa_add_mp(opb, AFI_IPv4, p->capa.ann.mp_v4);
1358 	}
1359 	if (p->capa.ann.mp_v6) {	/* 4 bytes data */
1360 		errs += session_capa_add(p, opb, CAPA_MP, 4, &optparamlen);
1361 		errs += session_capa_add_mp(opb, AFI_IPv6, p->capa.ann.mp_v6);
1362 	}
1363 
1364 	/* route refresh, RFC 2918 */
1365 	if (p->capa.ann.refresh)	/* no data */
1366 		errs += session_capa_add(p, opb, CAPA_REFRESH, 0, &optparamlen);
1367 
1368 	/* End-of-RIB marker, RFC 4724 */
1369 	if (p->capa.ann.restart) {	/* 2 bytes data */
1370 		u_char		c[2];
1371 
1372 		bzero(&c, 2);
1373 		c[0] = 0x80; /* we're always restarting */
1374 		errs += session_capa_add(p, opb, CAPA_RESTART, 2, &optparamlen);
1375 		errs += buf_add(opb, &c, 2);
1376 	}
1377 
1378 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1379 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1380 		u_int32_t	nas;
1381 
1382 		nas = htonl(conf->as);
1383 		errs += session_capa_add(p, opb, CAPA_AS4BYTE, 4, &optparamlen);
1384 		errs += buf_add(opb, &nas, 4);
1385 	}
1386 
1387 	len = MSGSIZE_OPEN_MIN + optparamlen;
1388 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1389 		buf_free(opb);
1390 		bgp_fsm(p, EVNT_CON_FATAL);
1391 		return;
1392 	}
1393 
1394 	msg.version = 4;
1395 	if (conf->as > USHRT_MAX)
1396 		msg.myas = htons(conf->short_as);
1397 	else
1398 		msg.myas = htons(conf->as);
1399 	if (p->conf.holdtime)
1400 		msg.holdtime = htons(p->conf.holdtime);
1401 	else
1402 		msg.holdtime = htons(conf->holdtime);
1403 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1404 	msg.optparamlen = optparamlen;
1405 
1406 	errs += buf_add(buf->buf, &msg.version, sizeof(msg.version));
1407 	errs += buf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1408 	errs += buf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1409 	errs += buf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1410 	errs += buf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1411 
1412 	if (optparamlen)
1413 		errs += buf_add(buf->buf, opb->buf, optparamlen);
1414 
1415 	buf_free(opb);
1416 
1417 	if (errs > 0) {
1418 		buf_free(buf->buf);
1419 		free(buf);
1420 		bgp_fsm(p, EVNT_CON_FATAL);
1421 		return;
1422 	}
1423 
1424 	if (session_sendmsg(buf, p) == -1) {
1425 		bgp_fsm(p, EVNT_CON_FATAL);
1426 		return;
1427 	}
1428 
1429 	p->stats.msg_sent_open++;
1430 }
1431 
1432 void
1433 session_keepalive(struct peer *p)
1434 {
1435 	struct bgp_msg		*buf;
1436 
1437 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1438 	    session_sendmsg(buf, p) == -1) {
1439 		bgp_fsm(p, EVNT_CON_FATAL);
1440 		return;
1441 	}
1442 
1443 	start_timer_keepalive(p);
1444 	p->stats.msg_sent_keepalive++;
1445 }
1446 
1447 void
1448 session_update(u_int32_t peerid, void *data, size_t datalen)
1449 {
1450 	struct peer		*p;
1451 	struct bgp_msg		*buf;
1452 
1453 	if ((p = getpeerbyid(peerid)) == NULL) {
1454 		log_warnx("no such peer: id=%u", peerid);
1455 		return;
1456 	}
1457 
1458 	if (p->state != STATE_ESTABLISHED)
1459 		return;
1460 
1461 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1462 		bgp_fsm(p, EVNT_CON_FATAL);
1463 		return;
1464 	}
1465 
1466 	if (buf_add(buf->buf, data, datalen)) {
1467 		buf_free(buf->buf);
1468 		free(buf);
1469 		bgp_fsm(p, EVNT_CON_FATAL);
1470 		return;
1471 	}
1472 
1473 	if (session_sendmsg(buf, p) == -1) {
1474 		bgp_fsm(p, EVNT_CON_FATAL);
1475 		return;
1476 	}
1477 
1478 	start_timer_keepalive(p);
1479 	p->stats.msg_sent_update++;
1480 }
1481 
1482 void
1483 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1484     void *data, ssize_t datalen)
1485 {
1486 	struct bgp_msg		*buf;
1487 	u_int			 errs = 0;
1488 	u_int8_t		 null8 = 0;
1489 
1490 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1491 		return;
1492 
1493 	if ((buf = session_newmsg(NOTIFICATION,
1494 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1495 		bgp_fsm(p, EVNT_CON_FATAL);
1496 		return;
1497 	}
1498 
1499 	errs += buf_add(buf->buf, &errcode, sizeof(errcode));
1500 	if (errcode == ERR_CEASE)
1501 		errs += buf_add(buf->buf, &null8, sizeof(null8));
1502 	else
1503 		errs += buf_add(buf->buf, &subcode, sizeof(subcode));
1504 
1505 	if (datalen > 0)
1506 		errs += buf_add(buf->buf, data, datalen);
1507 
1508 	if (errs > 0) {
1509 		buf_free(buf->buf);
1510 		free(buf);
1511 		bgp_fsm(p, EVNT_CON_FATAL);
1512 		return;
1513 	}
1514 
1515 	if (session_sendmsg(buf, p) == -1) {
1516 		bgp_fsm(p, EVNT_CON_FATAL);
1517 		return;
1518 	}
1519 
1520 	p->stats.msg_sent_notification++;
1521 	p->stats.last_sent_errcode = errcode;
1522 	p->stats.last_sent_suberr = subcode;
1523 }
1524 
1525 int
1526 session_neighbor_rrefresh(struct peer *p)
1527 {
1528 	if (!p->capa.peer.refresh)
1529 		return (-1);
1530 
1531 	if (p->capa.peer.mp_v4 != SAFI_NONE)
1532 		session_rrefresh(p, AFI_IPv4, p->capa.peer.mp_v4);
1533 	if (p->capa.peer.mp_v6 != SAFI_NONE)
1534 		session_rrefresh(p, AFI_IPv6, p->capa.peer.mp_v6);
1535 
1536 	return (0);
1537 }
1538 
1539 void
1540 session_rrefresh(struct peer *p, u_int16_t afi, u_int8_t safi)
1541 {
1542 	struct bgp_msg		*buf;
1543 	int			 errs = 0;
1544 	u_int8_t		 null8 = 0;
1545 
1546 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1547 		bgp_fsm(p, EVNT_CON_FATAL);
1548 		return;
1549 	}
1550 
1551 	afi = htons(afi);
1552 	errs += buf_add(buf->buf, &afi, sizeof(afi));
1553 	errs += buf_add(buf->buf, &null8, sizeof(null8));
1554 	errs += buf_add(buf->buf, &safi, sizeof(safi));
1555 
1556 	if (errs > 0) {
1557 		buf_free(buf->buf);
1558 		free(buf);
1559 		bgp_fsm(p, EVNT_CON_FATAL);
1560 		return;
1561 	}
1562 
1563 	if (session_sendmsg(buf, p) == -1) {
1564 		bgp_fsm(p, EVNT_CON_FATAL);
1565 		return;
1566 	}
1567 
1568 	p->stats.msg_sent_rrefresh++;
1569 }
1570 
1571 int
1572 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1573 {
1574 	ssize_t		n, rpos, av, left;
1575 	socklen_t	len;
1576 	int		error, processed = 0;
1577 	u_int16_t	msglen;
1578 	u_int8_t	msgtype;
1579 
1580 	if (p->state == STATE_CONNECT) {
1581 		if (pfd->revents & POLLOUT) {
1582 			if (pfd->revents & POLLIN) {
1583 				/* error occurred */
1584 				len = sizeof(error);
1585 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1586 				    &error, &len) == -1 || error) {
1587 					if (error)
1588 						errno = error;
1589 					if (errno != p->lasterr) {
1590 						log_peer_warn(&p->conf,
1591 						    "socket error");
1592 						p->lasterr = errno;
1593 					}
1594 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1595 					return (1);
1596 				}
1597 			}
1598 			bgp_fsm(p, EVNT_CON_OPEN);
1599 			return (1);
1600 		}
1601 		if (pfd->revents & POLLHUP) {
1602 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1603 			return (1);
1604 		}
1605 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1606 			bgp_fsm(p, EVNT_CON_FATAL);
1607 			return (1);
1608 		}
1609 		return (0);
1610 	}
1611 
1612 	if (pfd->revents & POLLHUP) {
1613 		bgp_fsm(p, EVNT_CON_CLOSED);
1614 		return (1);
1615 	}
1616 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1617 		bgp_fsm(p, EVNT_CON_FATAL);
1618 		return (1);
1619 	}
1620 
1621 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1622 		if ((error = msgbuf_write(&p->wbuf)) < 0) {
1623 			if (error == -2)
1624 				log_peer_warnx(&p->conf, "Connection closed");
1625 			else
1626 				log_peer_warn(&p->conf, "write error");
1627 			bgp_fsm(p, EVNT_CON_FATAL);
1628 			return (1);
1629 		}
1630 		if (!(pfd->revents & POLLIN))
1631 			return (1);
1632 	}
1633 
1634 	if (p->rbuf && pfd->revents & POLLIN) {
1635 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1636 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1637 			if (errno != EINTR && errno != EAGAIN) {
1638 				log_peer_warn(&p->conf, "read error");
1639 				bgp_fsm(p, EVNT_CON_FATAL);
1640 			}
1641 			return (1);
1642 		}
1643 		if (n == 0) {	/* connection closed */
1644 			bgp_fsm(p, EVNT_CON_CLOSED);
1645 			return (1);
1646 		}
1647 
1648 		rpos = 0;
1649 		av = p->rbuf->wpos + n;
1650 		p->stats.last_read = time(NULL);
1651 
1652 		/*
1653 		 * session might drop to IDLE -> buffers deallocated
1654 		 * we MUST check rbuf != NULL before use
1655 		 */
1656 		for (;;) {
1657 			if (rpos + MSGSIZE_HEADER > av)
1658 				break;
1659 			if (p->rbuf == NULL)
1660 				break;
1661 			if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1662 			    &msgtype) == -1)
1663 				return (0);
1664 			if (rpos + msglen > av)
1665 				break;
1666 			p->rbuf->rptr = p->rbuf->buf + rpos;
1667 
1668 			switch (msgtype) {
1669 			case OPEN:
1670 				bgp_fsm(p, EVNT_RCVD_OPEN);
1671 				p->stats.msg_rcvd_open++;
1672 				break;
1673 			case UPDATE:
1674 				bgp_fsm(p, EVNT_RCVD_UPDATE);
1675 				p->stats.msg_rcvd_update++;
1676 				break;
1677 			case NOTIFICATION:
1678 				bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1679 				p->stats.msg_rcvd_notification++;
1680 				break;
1681 			case KEEPALIVE:
1682 				bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1683 				p->stats.msg_rcvd_keepalive++;
1684 				break;
1685 			case RREFRESH:
1686 				parse_refresh(p);
1687 				p->stats.msg_rcvd_rrefresh++;
1688 				break;
1689 			default:	/* cannot happen */
1690 				session_notification(p, ERR_HEADER,
1691 				    ERR_HDR_TYPE, &msgtype, 1);
1692 				log_warnx("received message with "
1693 				    "unknown type %u", msgtype);
1694 				bgp_fsm(p, EVNT_CON_FATAL);
1695 			}
1696 			rpos += msglen;
1697 			if (++processed > MSG_PROCESS_LIMIT)
1698 				break;
1699 		}
1700 		if (p->rbuf == NULL)
1701 			return (1);
1702 
1703 		if (rpos < av) {
1704 			left = av - rpos;
1705 			memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1706 			p->rbuf->wpos = left;
1707 		} else
1708 			p->rbuf->wpos = 0;
1709 
1710 		return (1);
1711 	}
1712 	return (0);
1713 }
1714 
1715 int
1716 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1717 {
1718 	struct mrt		*mrt;
1719 	u_char			*p;
1720 	u_int16_t		 olen;
1721 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1722 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1723 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1724 
1725 	/* caller MUST make sure we are getting 19 bytes! */
1726 	p = data;
1727 	if (memcmp(p, marker, sizeof(marker))) {
1728 		log_peer_warnx(&peer->conf, "sync error");
1729 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1730 		bgp_fsm(peer, EVNT_CON_FATAL);
1731 		return (-1);
1732 	}
1733 	p += MSGSIZE_HEADER_MARKER;
1734 
1735 	memcpy(&olen, p, 2);
1736 	*len = ntohs(olen);
1737 	p += 2;
1738 	memcpy(type, p, 1);
1739 
1740 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1741 		log_peer_warnx(&peer->conf,
1742 		    "received message: illegal length: %u byte", *len);
1743 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1744 		    &olen, sizeof(olen));
1745 		bgp_fsm(peer, EVNT_CON_FATAL);
1746 		return (-1);
1747 	}
1748 
1749 	switch (*type) {
1750 	case OPEN:
1751 		if (*len < MSGSIZE_OPEN_MIN) {
1752 			log_peer_warnx(&peer->conf,
1753 			    "received OPEN: illegal len: %u byte", *len);
1754 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1755 			    &olen, sizeof(olen));
1756 			bgp_fsm(peer, EVNT_CON_FATAL);
1757 			return (-1);
1758 		}
1759 		break;
1760 	case NOTIFICATION:
1761 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1762 			log_peer_warnx(&peer->conf,
1763 			    "received NOTIFICATION: illegal len: %u byte",
1764 			    *len);
1765 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1766 			    &olen, sizeof(olen));
1767 			bgp_fsm(peer, EVNT_CON_FATAL);
1768 			return (-1);
1769 		}
1770 		break;
1771 	case UPDATE:
1772 		if (*len < MSGSIZE_UPDATE_MIN) {
1773 			log_peer_warnx(&peer->conf,
1774 			    "received UPDATE: illegal len: %u byte", *len);
1775 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1776 			    &olen, sizeof(olen));
1777 			bgp_fsm(peer, EVNT_CON_FATAL);
1778 			return (-1);
1779 		}
1780 		break;
1781 	case KEEPALIVE:
1782 		if (*len != MSGSIZE_KEEPALIVE) {
1783 			log_peer_warnx(&peer->conf,
1784 			    "received KEEPALIVE: illegal len: %u byte", *len);
1785 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1786 			    &olen, sizeof(olen));
1787 			bgp_fsm(peer, EVNT_CON_FATAL);
1788 			return (-1);
1789 		}
1790 		break;
1791 	case RREFRESH:
1792 		if (*len != MSGSIZE_RREFRESH) {
1793 			log_peer_warnx(&peer->conf,
1794 			    "received RREFRESH: illegal len: %u byte", *len);
1795 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1796 			    &olen, sizeof(olen));
1797 			bgp_fsm(peer, EVNT_CON_FATAL);
1798 			return (-1);
1799 		}
1800 		break;
1801 	default:
1802 		log_peer_warnx(&peer->conf,
1803 		    "received msg with unknown type %u", *type);
1804 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1805 		    type, 1);
1806 		bgp_fsm(peer, EVNT_CON_FATAL);
1807 		return (-1);
1808 	}
1809 	LIST_FOREACH(mrt, &mrthead, entry) {
1810 		if (mrt->type != MRT_ALL_IN && (mrt->type != MRT_UPDATE_IN ||
1811 		    *type != UPDATE))
1812 			continue;
1813 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1814 		    mrt->peer_id == peer->conf.id ||
1815 		    mrt->group_id == peer->conf.groupid)
1816 			mrt_dump_bgp_msg(mrt, data, *len, peer, conf);
1817 	}
1818 	return (0);
1819 }
1820 
1821 int
1822 parse_open(struct peer *peer)
1823 {
1824 	u_char		*p, *op_val;
1825 	u_int8_t	 version, rversion;
1826 	u_int16_t	 short_as, msglen;
1827 	u_int16_t	 holdtime, oholdtime, myholdtime;
1828 	u_int32_t	 as, bgpid;
1829 	u_int8_t	 optparamlen, plen;
1830 	u_int8_t	 op_type, op_len;
1831 
1832 	p = peer->rbuf->rptr;
1833 	p += MSGSIZE_HEADER_MARKER;
1834 	memcpy(&msglen, p, sizeof(msglen));
1835 	msglen = ntohs(msglen);
1836 
1837 	p = peer->rbuf->rptr;
1838 	p += MSGSIZE_HEADER;	/* header is already checked */
1839 
1840 	memcpy(&version, p, sizeof(version));
1841 	p += sizeof(version);
1842 
1843 	if (version != BGP_VERSION) {
1844 		log_peer_warnx(&peer->conf,
1845 		    "peer wants unrecognized version %u", version);
1846 		if (version > BGP_VERSION)
1847 			rversion = version - BGP_VERSION;
1848 		else
1849 			rversion = BGP_VERSION;
1850 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
1851 		    &rversion, sizeof(rversion));
1852 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1853 		return (-1);
1854 	}
1855 
1856 	memcpy(&short_as, p, sizeof(short_as));
1857 	p += sizeof(short_as);
1858 	as = peer->short_as = ntohs(short_as);
1859 
1860 	/* if remote-as is zero and it's a cloned neighbor, accept any */
1861 	if (peer->conf.cloned && !peer->conf.remote_as && as != AS_TRANS) {
1862 		peer->conf.remote_as = as;
1863 		peer->conf.ebgp = (peer->conf.remote_as != conf->as);
1864 	}
1865 
1866 	memcpy(&oholdtime, p, sizeof(oholdtime));
1867 	p += sizeof(oholdtime);
1868 
1869 	holdtime = ntohs(oholdtime);
1870 	if (holdtime && holdtime < peer->conf.min_holdtime) {
1871 		log_peer_warnx(&peer->conf,
1872 		    "peer requests unacceptable holdtime %u", holdtime);
1873 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
1874 		    NULL, 0);
1875 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1876 		return (-1);
1877 	}
1878 
1879 	myholdtime = peer->conf.holdtime;
1880 	if (!myholdtime)
1881 		myholdtime = conf->holdtime;
1882 	if (holdtime < myholdtime)
1883 		peer->holdtime = holdtime;
1884 	else
1885 		peer->holdtime = myholdtime;
1886 
1887 	memcpy(&bgpid, p, sizeof(bgpid));
1888 	p += sizeof(bgpid);
1889 
1890 	/* check bgpid for validity - just disallow 0 */
1891 	if (ntohl(bgpid) == 0) {
1892 		log_peer_warnx(&peer->conf, "peer BGPID %lu unacceptable",
1893 		    ntohl(bgpid));
1894 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
1895 		    NULL, 0);
1896 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1897 		return (-1);
1898 	}
1899 	peer->remote_bgpid = bgpid;
1900 
1901 	memcpy(&optparamlen, p, sizeof(optparamlen));
1902 	p += sizeof(optparamlen);
1903 
1904 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
1905 			log_peer_warnx(&peer->conf,
1906 			    "corrupt OPEN message received: length mismatch");
1907 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1908 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1909 			return (-1);
1910 	}
1911 
1912 	plen = optparamlen;
1913 	while (plen > 0) {
1914 		if (plen < 2) {
1915 			log_peer_warnx(&peer->conf,
1916 			    "corrupt OPEN message received, len wrong");
1917 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1918 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1919 			return (-1);
1920 		}
1921 		memcpy(&op_type, p, sizeof(op_type));
1922 		p += sizeof(op_type);
1923 		plen -= sizeof(op_type);
1924 		memcpy(&op_len, p, sizeof(op_len));
1925 		p += sizeof(op_len);
1926 		plen -= sizeof(op_len);
1927 		if (op_len > 0) {
1928 			if (plen < op_len) {
1929 				log_peer_warnx(&peer->conf,
1930 				    "corrupt OPEN message received, len wrong");
1931 				session_notification(peer, ERR_OPEN, 0,
1932 				    NULL, 0);
1933 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1934 				return (-1);
1935 			}
1936 			op_val = p;
1937 			p += op_len;
1938 			plen -= op_len;
1939 		} else
1940 			op_val = NULL;
1941 
1942 		switch (op_type) {
1943 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
1944 			if (parse_capabilities(peer, op_val, op_len,
1945 			    &as) == -1) {
1946 				session_notification(peer, ERR_OPEN, 0,
1947 				    NULL, 0);
1948 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1949 				return (-1);
1950 			}
1951 			break;
1952 		case OPT_PARAM_AUTH:			/* deprecated */
1953 		default:
1954 			/*
1955 			 * unsupported type
1956 			 * the RFCs tell us to leave the data section empty
1957 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
1958 			 * How the peer should know _which_ optional parameter
1959 			 * we don't support is beyond me.
1960 			 */
1961 			log_peer_warnx(&peer->conf,
1962 			    "received OPEN message with unsupported optional "
1963 			    "parameter: type %u", op_type);
1964 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
1965 				NULL, 0);
1966 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1967 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
1968 			peer->IdleHoldTime /= 2;
1969 			return (-1);
1970 		}
1971 	}
1972 
1973 	if (peer->conf.remote_as != as) {
1974 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
1975 		    log_as(as));
1976 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
1977 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1978 		return (-1);
1979 	}
1980 
1981 	return (0);
1982 }
1983 
1984 int
1985 parse_update(struct peer *peer)
1986 {
1987 	u_char		*p;
1988 	u_int16_t	 datalen;
1989 
1990 	/*
1991 	 * we pass the message verbatim to the rde.
1992 	 * in case of errors the whole session is reset with a
1993 	 * notification anyway, we only need to know the peer
1994 	 */
1995 	p = peer->rbuf->rptr;
1996 	p += MSGSIZE_HEADER_MARKER;
1997 	memcpy(&datalen, p, sizeof(datalen));
1998 	datalen = ntohs(datalen);
1999 
2000 	p = peer->rbuf->rptr;
2001 	p += MSGSIZE_HEADER;	/* header is already checked */
2002 	datalen -= MSGSIZE_HEADER;
2003 
2004 	if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p,
2005 	    datalen) == -1)
2006 		return (-1);
2007 
2008 	return (0);
2009 }
2010 
2011 int
2012 parse_refresh(struct peer *peer)
2013 {
2014 	u_char		*p;
2015 	struct rrefresh	 r;
2016 
2017 	p = peer->rbuf->rptr;
2018 	p += MSGSIZE_HEADER;	/* header is already checked */
2019 
2020 	/* afi, 2 byte */
2021 	memcpy(&r.afi, p, sizeof(r.afi));
2022 	r.afi = ntohs(r.afi);
2023 	p += 2;
2024 	/* reserved, 1 byte */
2025 	p += 1;
2026 	/* safi, 1 byte */
2027 	memcpy(&r.safi, p, sizeof(r.safi));
2028 
2029 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2030 
2031 	if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &r,
2032 	    sizeof(r)) == -1)
2033 		return (-1);
2034 
2035 	return (0);
2036 }
2037 
2038 int
2039 parse_notification(struct peer *peer)
2040 {
2041 	u_char		*p;
2042 	u_int8_t	 errcode;
2043 	u_int8_t	 subcode;
2044 	u_int16_t	 datalen;
2045 	u_int8_t	 capa_code;
2046 	u_int8_t	 capa_len;
2047 
2048 	/* just log */
2049 	p = peer->rbuf->rptr;
2050 	p += MSGSIZE_HEADER_MARKER;
2051 	memcpy(&datalen, p, sizeof(datalen));
2052 	datalen = ntohs(datalen);
2053 
2054 	p = peer->rbuf->rptr;
2055 	p += MSGSIZE_HEADER;	/* header is already checked */
2056 	datalen -= MSGSIZE_HEADER;
2057 
2058 	memcpy(&errcode, p, sizeof(errcode));
2059 	p += sizeof(errcode);
2060 	datalen -= sizeof(errcode);
2061 
2062 	memcpy(&subcode, p, sizeof(subcode));
2063 	p += sizeof(subcode);
2064 	datalen -= sizeof(subcode);
2065 
2066 	log_notification(peer, errcode, subcode, p, datalen);
2067 	peer->errcnt++;
2068 
2069 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2070 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2071 			log_peer_warnx(&peer->conf, "received \"unsupported "
2072 			    "capability\" notification without data part, "
2073 			    "disabling capability announcements altogether");
2074 			session_capa_ann_none(peer);
2075 		}
2076 
2077 		while (datalen > 0) {
2078 			if (datalen < 2) {
2079 				log_peer_warnx(&peer->conf,
2080 				    "parse_notification: "
2081 				    "expect len >= 2, len is %u", datalen);
2082 				return (-1);
2083 			}
2084 			memcpy(&capa_code, p, sizeof(capa_code));
2085 			p += sizeof(capa_code);
2086 			datalen -= sizeof(capa_code);
2087 			memcpy(&capa_len, p, sizeof(capa_len));
2088 			p += sizeof(capa_len);
2089 			datalen -= sizeof(capa_len);
2090 			if (datalen < capa_len) {
2091 				log_peer_warnx(&peer->conf,
2092 				    "parse_notification: capa_len %u exceeds "
2093 				    "remaining msg length %u", capa_len,
2094 				    datalen);
2095 				return (-1);
2096 			}
2097 			p += capa_len;
2098 			datalen -= capa_len;
2099 			switch (capa_code) {
2100 			case CAPA_MP:
2101 				peer->capa.ann.mp_v4 = SAFI_NONE;
2102 				peer->capa.ann.mp_v6 = SAFI_NONE;
2103 				log_peer_warnx(&peer->conf,
2104 				    "disabling multiprotocol capability");
2105 				break;
2106 			case CAPA_REFRESH:
2107 				peer->capa.ann.refresh = 0;
2108 				log_peer_warnx(&peer->conf,
2109 				    "disabling route refresh capability");
2110 				break;
2111 			case CAPA_RESTART:
2112 				peer->capa.ann.restart = 0;
2113 				log_peer_warnx(&peer->conf,
2114 				    "disabling restart capability");
2115 				break;
2116 			case CAPA_AS4BYTE:
2117 				peer->capa.ann.as4byte = 0;
2118 				log_peer_warnx(&peer->conf,
2119 				    "disabling 4-byte AS num capability");
2120 				break;
2121 			default:	/* should not happen... */
2122 				log_peer_warnx(&peer->conf, "received "
2123 				    "\"unsupported capability\" notification "
2124 				    "for unknown capability %u, disabling "
2125 				    "capability announcements altogether",
2126 				    capa_code);
2127 				session_capa_ann_none(peer);
2128 				break;
2129 			}
2130 		}
2131 
2132 		return (1);
2133 	}
2134 
2135 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2136 		session_capa_ann_none(peer);
2137 		return (1);
2138 	}
2139 
2140 	return (0);
2141 }
2142 
2143 int
2144 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2145 {
2146 	u_int16_t	 len;
2147 	u_int8_t	 capa_code;
2148 	u_int8_t	 capa_len;
2149 	u_char		*capa_val;
2150 	u_int16_t	 mp_afi;
2151 	u_int8_t	 mp_safi;
2152 	u_int32_t	 remote_as;
2153 
2154 	len = dlen;
2155 	while (len > 0) {
2156 		if (len < 2) {
2157 			log_peer_warnx(&peer->conf, "parse_capabilities: "
2158 			    "expect len >= 2, len is %u", len);
2159 			return (-1);
2160 		}
2161 		memcpy(&capa_code, d, sizeof(capa_code));
2162 		d += sizeof(capa_code);
2163 		len -= sizeof(capa_code);
2164 		memcpy(&capa_len, d, sizeof(capa_len));
2165 		d += sizeof(capa_len);
2166 		len -= sizeof(capa_len);
2167 		if (capa_len > 0) {
2168 			if (len < capa_len) {
2169 				log_peer_warnx(&peer->conf,
2170 				    "parse_capabilities: "
2171 				    "len %u smaller than capa_len %u",
2172 				    len, capa_len);
2173 				return (-1);
2174 			}
2175 			capa_val = d;
2176 			d += capa_len;
2177 			len -= capa_len;
2178 		} else
2179 			capa_val = NULL;
2180 
2181 		switch (capa_code) {
2182 		case CAPA_MP:			/* RFC 4760 */
2183 			if (capa_len != 4) {
2184 				log_peer_warnx(&peer->conf,
2185 				    "parse_capabilities: "
2186 				    "expect len 4, len is %u", capa_len);
2187 				return (-1);
2188 			}
2189 			memcpy(&mp_afi, capa_val, sizeof(mp_afi));
2190 			mp_afi = ntohs(mp_afi);
2191 			memcpy(&mp_safi, capa_val + 3, sizeof(mp_safi));
2192 			switch (mp_afi) {
2193 			case AFI_IPv4:
2194 				if (mp_safi < 1 || mp_safi > 3)
2195 					log_peer_warnx(&peer->conf,
2196 					    "parse_capabilities: AFI IPv4, "
2197 					    "mp_safi %u unknown", mp_safi);
2198 				else
2199 					peer->capa.peer.mp_v4 = mp_safi;
2200 				break;
2201 			case AFI_IPv6:
2202 				if (mp_safi < 1 || mp_safi > 3)
2203 					log_peer_warnx(&peer->conf,
2204 					    "parse_capabilities: AFI IPv6, "
2205 					    "mp_safi %u unknown", mp_safi);
2206 				else
2207 					peer->capa.peer.mp_v6 = mp_safi;
2208 				break;
2209 			default:			/* ignore */
2210 				break;
2211 			}
2212 			break;
2213 		case CAPA_REFRESH:
2214 			peer->capa.peer.refresh = 1;
2215 			break;
2216 		case CAPA_RESTART:
2217 			peer->capa.peer.restart = 1;
2218 			/* we don't care about the further restart capas yet */
2219 			break;
2220 		case CAPA_AS4BYTE:
2221 			if (capa_len != 4) {
2222 				log_peer_warnx(&peer->conf,
2223 				    "parse_capabilities: "
2224 				    "expect len 4, len is %u", capa_len);
2225 				return (-1);
2226 			}
2227 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2228 			*as = ntohl(remote_as);
2229 			peer->capa.peer.as4byte = 1;
2230 			break;
2231 		default:
2232 			break;
2233 		}
2234 	}
2235 
2236 	return (0);
2237 }
2238 
2239 void
2240 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2241 {
2242 	struct imsg		 imsg;
2243 	struct mrt		 xmrt;
2244 	struct mrt		*mrt;
2245 	struct peer_config	*pconf;
2246 	struct peer		*p, *next;
2247 	struct listen_addr	*la, *nla;
2248 	struct kif		*kif;
2249 	u_char			*data;
2250 	enum reconf_action	 reconf;
2251 	int			 n, depend_ok;
2252 	u_int8_t		 errcode, subcode;
2253 
2254 	if ((n = imsg_read(ibuf)) == -1)
2255 		fatal("session_dispatch_imsg: imsg_read error");
2256 
2257 	if (n == 0)	/* connection closed */
2258 		fatalx("session_dispatch_imsg: pipe closed");
2259 
2260 	for (;;) {
2261 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2262 			fatal("session_dispatch_imsg: imsg_get error");
2263 
2264 		if (n == 0)
2265 			break;
2266 
2267 		switch (imsg.hdr.type) {
2268 		case IMSG_RECONF_CONF:
2269 			if (idx != PFD_PIPE_MAIN)
2270 				fatalx("reconf request not from parent");
2271 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
2272 			    NULL)
2273 				fatal(NULL);
2274 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
2275 			if ((nconf->listen_addrs = calloc(1,
2276 			    sizeof(struct listen_addrs))) == NULL)
2277 				fatal(NULL);
2278 			TAILQ_INIT(nconf->listen_addrs);
2279 			npeers = NULL;
2280 			init_conf(nconf);
2281 			pending_reconf = 1;
2282 			break;
2283 		case IMSG_RECONF_PEER:
2284 			if (idx != PFD_PIPE_MAIN)
2285 				fatalx("reconf request not from parent");
2286 			pconf = imsg.data;
2287 			p = getpeerbyaddr(&pconf->remote_addr);
2288 			if (p == NULL) {
2289 				if ((p = calloc(1, sizeof(struct peer))) ==
2290 				    NULL)
2291 					fatal("new_peer");
2292 				p->state = p->prev_state = STATE_NONE;
2293 				p->next = npeers;
2294 				npeers = p;
2295 				reconf = RECONF_REINIT;
2296 			} else
2297 				reconf = RECONF_KEEP;
2298 
2299 			memcpy(&p->conf, pconf, sizeof(struct peer_config));
2300 			p->conf.reconf_action = reconf;
2301 			break;
2302 		case IMSG_RECONF_LISTENER:
2303 			if (idx != PFD_PIPE_MAIN)
2304 				fatalx("reconf request not from parent");
2305 			if (nconf == NULL)
2306 				fatalx("IMSG_RECONF_LISTENER but no config");
2307 			nla = imsg.data;
2308 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2309 				if (!la_cmp(la, nla))
2310 					break;
2311 
2312 			if (la == NULL) {
2313 				if (nla->reconf != RECONF_REINIT)
2314 					fatalx("king bula sez: "
2315 					    "expected REINIT");
2316 
2317 				if ((nla->fd = imsg_get_fd(ibuf)) == -1)
2318 					log_warnx("expected to receive fd for "
2319 					    "%s but didn't receive any",
2320 					    log_sockaddr((struct sockaddr *)
2321 					    &nla->sa));
2322 
2323 				la = calloc(1, sizeof(struct listen_addr));
2324 				if (la == NULL)
2325 					fatal(NULL);
2326 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2327 				la->flags = nla->flags;
2328 				la->fd = nla->fd;
2329 				la->reconf = RECONF_REINIT;
2330 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2331 				    entry);
2332 			} else {
2333 				if (nla->reconf != RECONF_KEEP)
2334 					fatalx("king bula sez: expected KEEP");
2335 				la->reconf = RECONF_KEEP;
2336 			}
2337 
2338 			break;
2339 		case IMSG_RECONF_DONE:
2340 			if (idx != PFD_PIPE_MAIN)
2341 				fatalx("reconf request not from parent");
2342 			if (nconf == NULL)
2343 				fatalx("got IMSG_RECONF_DONE but no config");
2344 			conf->as = nconf->as;
2345 			conf->holdtime = nconf->holdtime;
2346 			conf->bgpid = nconf->bgpid;
2347 			conf->min_holdtime = nconf->min_holdtime;
2348 
2349 			/* add new peers */
2350 			for (p = npeers; p != NULL; p = next) {
2351 				next = p->next;
2352 				p->next = peers;
2353 				peers = p;
2354 			}
2355 			/* find ones that need attention */
2356 			for (p = peers; p != NULL; p = p->next) {
2357 				/* needs to be deleted? */
2358 				if (p->conf.reconf_action == RECONF_NONE &&
2359 				    !p->conf.cloned)
2360 					p->conf.reconf_action = RECONF_DELETE;
2361 				/* had demotion, is demoted, demote removed? */
2362 				if (p->demoted && !p->conf.demote_group[0])
2363 						session_demote(p, -1);
2364 			}
2365 
2366 			/* delete old listeners */
2367 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2368 			    la = nla) {
2369 				nla = TAILQ_NEXT(la, entry);
2370 				if (la->reconf == RECONF_NONE) {
2371 					log_info("not listening on %s any more",
2372 					    log_sockaddr(
2373 					    (struct sockaddr *)&la->sa));
2374 					TAILQ_REMOVE(conf->listen_addrs, la,
2375 					    entry);
2376 					close(la->fd);
2377 					free(la);
2378 				}
2379 			}
2380 
2381 			/* add new listeners */
2382 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2383 			    NULL) {
2384 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2385 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2386 				    entry);
2387 			}
2388 
2389 			setup_listeners(listener_cnt);
2390 			free(nconf->listen_addrs);
2391 			free(nconf);
2392 			nconf = NULL;
2393 			pending_reconf = 0;
2394 			log_info("SE reconfigured");
2395 			break;
2396 		case IMSG_IFINFO:
2397 			if (idx != PFD_PIPE_MAIN)
2398 				fatalx("IFINFO message not from parent");
2399 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2400 			    sizeof(struct kif))
2401 				fatalx("IFINFO imsg with wrong len");
2402 			kif = imsg.data;
2403 			depend_ok = (kif->flags & IFF_UP) &&
2404 			    (LINK_STATE_IS_UP(kif->link_state) ||
2405 			    (kif->link_state == LINK_STATE_UNKNOWN &&
2406 			    kif->media_type != IFT_CARP));
2407 
2408 			for (p = peers; p != NULL; p = p->next)
2409 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2410 					if (depend_ok && !p->depend_ok) {
2411 						p->depend_ok = depend_ok;
2412 						bgp_fsm(p, EVNT_START);
2413 					} else if (!depend_ok && p->depend_ok) {
2414 						p->depend_ok = depend_ok;
2415 						bgp_fsm(p, EVNT_STOP);
2416 					}
2417 				}
2418 			break;
2419 		case IMSG_MRT_OPEN:
2420 		case IMSG_MRT_REOPEN:
2421 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2422 			    sizeof(struct mrt)) {
2423 				log_warnx("wrong imsg len");
2424 				break;
2425 			}
2426 
2427 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2428 			if ((xmrt.fd = imsg_get_fd(ibuf)) == -1)
2429 				log_warnx("expected to receive fd for mrt dump "
2430 				    "but didn't receive any");
2431 
2432 			mrt = mrt_get(&mrthead, &xmrt);
2433 			if (mrt == NULL) {
2434 				/* new dump */
2435 				mrt = calloc(1, sizeof(struct mrt));
2436 				if (mrt == NULL)
2437 					fatal("session_dispatch_imsg");
2438 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2439 				TAILQ_INIT(&mrt->bufs);
2440 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2441 			} else {
2442 				/* old dump reopened */
2443 				close(mrt->fd);
2444 				mrt->fd = xmrt.fd;
2445 			}
2446 			break;
2447 		case IMSG_MRT_CLOSE:
2448 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2449 			    sizeof(struct mrt)) {
2450 				log_warnx("wrong imsg len");
2451 				break;
2452 			}
2453 
2454 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2455 			mrt = mrt_get(&mrthead, &xmrt);
2456 			if (mrt != NULL) {
2457 				mrt_clean(mrt);
2458 				LIST_REMOVE(mrt, entry);
2459 				free(mrt);
2460 			}
2461 			break;
2462 		case IMSG_CTL_KROUTE:
2463 		case IMSG_CTL_KROUTE6:
2464 		case IMSG_CTL_KROUTE_ADDR:
2465 		case IMSG_CTL_SHOW_NEXTHOP:
2466 		case IMSG_CTL_SHOW_INTERFACE:
2467 			if (idx != PFD_PIPE_MAIN)
2468 				fatalx("ctl kroute request not from parent");
2469 			control_imsg_relay(&imsg);
2470 			break;
2471 		case IMSG_CTL_SHOW_RIB:
2472 		case IMSG_CTL_SHOW_RIB_PREFIX:
2473 		case IMSG_CTL_SHOW_RIB_ATTR:
2474 		case IMSG_CTL_SHOW_RIB_MEM:
2475 		case IMSG_CTL_SHOW_NETWORK:
2476 		case IMSG_CTL_SHOW_NETWORK6:
2477 		case IMSG_CTL_SHOW_NEIGHBOR:
2478 			if (idx != PFD_PIPE_ROUTE_CTL)
2479 				fatalx("ctl rib request not from RDE");
2480 			control_imsg_relay(&imsg);
2481 			break;
2482 		case IMSG_CTL_END:
2483 		case IMSG_CTL_RESULT:
2484 			control_imsg_relay(&imsg);
2485 			break;
2486 		case IMSG_UPDATE:
2487 			if (idx != PFD_PIPE_ROUTE)
2488 				fatalx("update request not from RDE");
2489 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2490 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2491 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2492 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2493 				log_warnx("RDE sent invalid update");
2494 			else
2495 				session_update(imsg.hdr.peerid, imsg.data,
2496 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2497 			break;
2498 		case IMSG_UPDATE_ERR:
2499 			if (idx != PFD_PIPE_ROUTE)
2500 				fatalx("update request not from RDE");
2501 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2502 				log_warnx("RDE sent invalid notification");
2503 				break;
2504 			}
2505 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2506 				log_warnx("no such peer: id=%u",
2507 				    imsg.hdr.peerid);
2508 				break;
2509 			}
2510 			data = imsg.data;
2511 			errcode = *data++;
2512 			subcode = *data++;
2513 
2514 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2515 				data = NULL;
2516 
2517 			session_notification(p, errcode, subcode,
2518 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2519 			switch (errcode) {
2520 			case ERR_CEASE:
2521 				switch (subcode) {
2522 				case ERR_CEASE_MAX_PREFIX:
2523 					bgp_fsm(p, EVNT_STOP);
2524 					if (p->conf.max_prefix_restart)
2525 						timer_set(p, Timer_IdleHold, 60 *
2526 						    p->conf.max_prefix_restart);
2527 					break;
2528 				default:
2529 					bgp_fsm(p, EVNT_CON_FATAL);
2530 					break;
2531 				}
2532 				break;
2533 			default:
2534 				bgp_fsm(p, EVNT_CON_FATAL);
2535 				break;
2536 			}
2537 			break;
2538 		default:
2539 			break;
2540 		}
2541 		imsg_free(&imsg);
2542 	}
2543 }
2544 
2545 int
2546 la_cmp(struct listen_addr *a, struct listen_addr *b)
2547 {
2548 	struct sockaddr_in	*in_a, *in_b;
2549 	struct sockaddr_in6	*in6_a, *in6_b;
2550 
2551 	if (a->sa.ss_family != b->sa.ss_family)
2552 		return (1);
2553 
2554 	switch (a->sa.ss_family) {
2555 	case AF_INET:
2556 		in_a = (struct sockaddr_in *)&a->sa;
2557 		in_b = (struct sockaddr_in *)&b->sa;
2558 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2559 			return (1);
2560 		if (in_a->sin_port != in_b->sin_port)
2561 			return (1);
2562 		break;
2563 	case AF_INET6:
2564 		in6_a = (struct sockaddr_in6 *)&a->sa;
2565 		in6_b = (struct sockaddr_in6 *)&b->sa;
2566 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2567 		    sizeof(struct in6_addr)))
2568 			return (1);
2569 		if (in6_a->sin6_port != in6_b->sin6_port)
2570 			return (1);
2571 		break;
2572 	default:
2573 		fatal("king bula sez: unknown address family");
2574 		/* NOTREACHED */
2575 	}
2576 
2577 	return (0);
2578 }
2579 
2580 struct peer *
2581 getpeerbyaddr(struct bgpd_addr *addr)
2582 {
2583 	struct peer *p;
2584 
2585 	/* we might want a more effective way to find peers by IP */
2586 	for (p = peers; p != NULL &&
2587 	    memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr));
2588 	    p = p->next)
2589 		;	/* nothing */
2590 
2591 	return (p);
2592 }
2593 
2594 struct peer *
2595 getpeerbydesc(const char *descr)
2596 {
2597 	struct peer	*p, *res = NULL;
2598 	int		 match = 0;
2599 
2600 	for (p = peers; p != NULL; p = p->next)
2601 		if (!strcmp(p->conf.descr, descr)) {
2602 			res = p;
2603 			match++;
2604 		}
2605 
2606 	if (match > 1)
2607 		log_info("neighbor description \"%s\" not unique, request "
2608 		    "aborted", descr);
2609 
2610 	if (match == 1)
2611 		return (res);
2612 	else
2613 		return (NULL);
2614 }
2615 
2616 struct peer *
2617 getpeerbyip(struct sockaddr *ip)
2618 {
2619 	struct peer	*p, *newpeer, *loose = NULL;
2620 	u_int32_t	 id;
2621 
2622 	/* we might want a more effective way to find peers by IP */
2623 	for (p = peers; p != NULL; p = p->next)
2624 		if (!p->conf.template &&
2625 		    p->conf.remote_addr.af == ip->sa_family) {
2626 			if (p->conf.remote_addr.af == AF_INET &&
2627 			    p->conf.remote_addr.v4.s_addr ==
2628 			    ((struct sockaddr_in *)ip)->sin_addr.s_addr)
2629 				return (p);
2630 			if (p->conf.remote_addr.af == AF_INET6 &&
2631 			    !bcmp(&p->conf.remote_addr.v6,
2632 			    &((struct sockaddr_in6 *)ip)->sin6_addr,
2633 			    sizeof(p->conf.remote_addr.v6)))
2634 				return (p);
2635 		}
2636 
2637 	/* try template matching */
2638 	for (p = peers; p != NULL; p = p->next)
2639 		if (p->conf.template &&
2640 		    p->conf.remote_addr.af == ip->sa_family &&
2641 		    session_match_mask(p, ip))
2642 			if (loose == NULL || loose->conf.remote_masklen <
2643 			    p->conf.remote_masklen)
2644 				loose = p;
2645 
2646 	if (loose != NULL) {
2647 		/* clone */
2648 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
2649 			fatal(NULL);
2650 		memcpy(newpeer, loose, sizeof(struct peer));
2651 		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
2652 			for (p = peers; p != NULL && p->conf.id != id;
2653 			    p = p->next)
2654 				;	/* nothing */
2655 			if (p == NULL) {	/* we found a free id */
2656 				newpeer->conf.id = id;
2657 				break;
2658 			}
2659 		}
2660 		if (newpeer->conf.remote_addr.af == AF_INET) {
2661 			newpeer->conf.remote_addr.v4.s_addr =
2662 			    ((struct sockaddr_in *)ip)->sin_addr.s_addr;
2663 			newpeer->conf.remote_masklen = 32;
2664 		}
2665 		if (newpeer->conf.remote_addr.af == AF_INET6) {
2666 			memcpy(&p->conf.remote_addr.v6,
2667 			    &((struct sockaddr_in6 *)ip)->sin6_addr,
2668 			    sizeof(newpeer->conf.remote_addr.v6));
2669 			newpeer->conf.remote_masklen = 128;
2670 		}
2671 		newpeer->conf.template = 0;
2672 		newpeer->conf.cloned = 1;
2673 		newpeer->state = newpeer->prev_state = STATE_NONE;
2674 		newpeer->conf.reconf_action = RECONF_REINIT;
2675 		newpeer->rbuf = NULL;
2676 		init_peer(newpeer);
2677 		bgp_fsm(newpeer, EVNT_START);
2678 		newpeer->next = peers;
2679 		peers = newpeer;
2680 		return (newpeer);
2681 	}
2682 
2683 	return (NULL);
2684 }
2685 
2686 int
2687 session_match_mask(struct peer *p, struct sockaddr *ip)
2688 {
2689 	int		 i;
2690 	in_addr_t	 v4mask;
2691 	struct in6_addr	*in;
2692 	struct in6_addr	 mask;
2693 
2694 	if (p->conf.remote_addr.af == AF_INET) {
2695 		v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
2696 		if (p->conf.remote_addr.v4.s_addr ==
2697 		    ((((struct sockaddr_in *)ip)->sin_addr.s_addr) & v4mask))
2698 			return (1);
2699 		else
2700 			return (0);
2701 	}
2702 
2703 	if (p->conf.remote_addr.af == AF_INET6) {
2704 		bzero(&mask, sizeof(mask));
2705 		for (i = 0; i < p->conf.remote_masklen / 8; i++)
2706 			mask.s6_addr[i] = 0xff;
2707 		i = p->conf.remote_masklen % 8;
2708 		if (i)
2709 			mask.s6_addr[p->conf.remote_masklen / 8] = 0xff00 >> i;
2710 
2711 		in = &((struct sockaddr_in6 *)ip)->sin6_addr;
2712 
2713 		for (i = 0; i < 16; i++)
2714 			if ((in->s6_addr[i] & mask.s6_addr[i]) !=
2715 			    p->conf.remote_addr.addr8[i])
2716 				return (0);
2717 
2718 		return (1);
2719 	}
2720 
2721 	return (0);
2722 }
2723 
2724 struct peer *
2725 getpeerbyid(u_int32_t peerid)
2726 {
2727 	struct peer *p;
2728 
2729 	/* we might want a more effective way to find peers by IP */
2730 	for (p = peers; p != NULL &&
2731 	    p->conf.id != peerid; p = p->next)
2732 		;	/* nothing */
2733 
2734 	return (p);
2735 }
2736 
2737 void
2738 session_down(struct peer *peer)
2739 {
2740 	peer->stats.last_updown = time(NULL);
2741 	if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
2742 	    NULL, 0) == -1)
2743 		fatalx("imsg_compose error");
2744 }
2745 
2746 void
2747 session_up(struct peer *p)
2748 {
2749 	struct session_up	 sup;
2750 
2751 	if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
2752 	    &p->conf, sizeof(p->conf)) == -1)
2753 		fatalx("imsg_compose error");
2754 
2755 	switch (p->sa_local.ss_family) {
2756 	case AF_INET:
2757 		sup.local_addr.af = AF_INET;
2758 		memcpy(&sup.local_addr.v4,
2759 		    &((struct sockaddr_in *)&p->sa_local)->sin_addr,
2760 		    sizeof(sup.local_addr.v4));
2761 		sup.remote_addr.af = AF_INET;
2762 		memcpy(&sup.remote_addr.v4,
2763 		    &((struct sockaddr_in *)&p->sa_remote)->sin_addr,
2764 		    sizeof(sup.remote_addr.v4));
2765 		break;
2766 	case AF_INET6:
2767 		sup.local_addr.af = AF_INET6;
2768 		memcpy(&sup.local_addr.v6,
2769 		    &((struct sockaddr_in6 *)&p->sa_local)->sin6_addr,
2770 		    sizeof(sup.local_addr.v6));
2771 		sup.remote_addr.af = AF_INET6;
2772 		memcpy(&sup.remote_addr.v6,
2773 		    &((struct sockaddr_in6 *)&p->sa_remote)->sin6_addr,
2774 		    sizeof(sup.remote_addr.v6));
2775 		break;
2776 	default:
2777 		fatalx("session_up: unsupported address family");
2778 	}
2779 
2780 	sup.remote_bgpid = p->remote_bgpid;
2781 	sup.short_as = p->short_as;
2782 	memcpy(&sup.capa_announced, &p->capa.ann, sizeof(sup.capa_announced));
2783 	memcpy(&sup.capa_received, &p->capa.peer, sizeof(sup.capa_received));
2784 	p->stats.last_updown = time(NULL);
2785 	if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
2786 	    &sup, sizeof(sup)) == -1)
2787 		fatalx("imsg_compose error");
2788 }
2789 
2790 int
2791 imsg_compose_parent(int type, pid_t pid, void *data, u_int16_t datalen)
2792 {
2793 	return (imsg_compose(ibuf_main, type, 0, pid, -1, data, datalen));
2794 }
2795 
2796 int
2797 imsg_compose_rde(int type, pid_t pid, void *data, u_int16_t datalen)
2798 {
2799 	return (imsg_compose(ibuf_rde, type, 0, pid, -1, data, datalen));
2800 }
2801 
2802 static struct sockaddr *
2803 addr2sa(struct bgpd_addr *addr, u_int16_t port)
2804 {
2805 	static struct sockaddr_storage	 ss;
2806 	struct sockaddr_in		*sa_in = (struct sockaddr_in *)&ss;
2807 	struct sockaddr_in6		*sa_in6 = (struct sockaddr_in6 *)&ss;
2808 
2809 	bzero(&ss, sizeof(ss));
2810 	switch (addr->af) {
2811 	case AF_INET:
2812 		sa_in->sin_family = AF_INET;
2813 		sa_in->sin_len = sizeof(struct sockaddr_in);
2814 		sa_in->sin_addr.s_addr = addr->v4.s_addr;
2815 		sa_in->sin_port = htons(port);
2816 		break;
2817 	case AF_INET6:
2818 		sa_in6->sin6_family = AF_INET6;
2819 		sa_in6->sin6_len = sizeof(struct sockaddr_in6);
2820 		memcpy(&sa_in6->sin6_addr, &addr->v6,
2821 		    sizeof(sa_in6->sin6_addr));
2822 		sa_in6->sin6_port = htons(port);
2823 		sa_in6->sin6_scope_id = addr->scope_id;
2824 		break;
2825 	}
2826 
2827 	return ((struct sockaddr *)&ss);
2828 }
2829 
2830 void
2831 session_demote(struct peer *p, int level)
2832 {
2833 	struct demote_msg	msg;
2834 
2835 	strlcpy(msg.demote_group, p->conf.demote_group,
2836 	    sizeof(msg.demote_group));
2837 	msg.level = level;
2838 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
2839 	    &msg, sizeof(msg)) == -1)
2840 		fatalx("imsg_compose error");
2841 
2842 	p->demoted += level;
2843 }
2844