xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 5054e3e78af0749a9bb00ba9a024b3ee2d90290f)
1 /*	$OpenBSD: session.c,v 1.299 2009/10/26 09:27:58 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/un.h>
25 #include <net/if_types.h>
26 #include <netinet/in.h>
27 #include <netinet/in_systm.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 
32 #include <err.h>
33 #include <errno.h>
34 #include <fcntl.h>
35 #include <limits.h>
36 #include <poll.h>
37 #include <pwd.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 
44 #include "bgpd.h"
45 #include "mrt.h"
46 #include "session.h"
47 
48 #define PFD_PIPE_MAIN		0
49 #define PFD_PIPE_ROUTE		1
50 #define PFD_PIPE_ROUTE_CTL	2
51 #define PFD_SOCK_CTL		3
52 #define PFD_SOCK_RCTL		4
53 #define PFD_LISTENERS_START	5
54 
55 void	session_sighdlr(int);
56 int	setup_listeners(u_int *);
57 void	init_conf(struct bgpd_config *);
58 void	init_peer(struct peer *);
59 void	start_timer_holdtime(struct peer *);
60 void	start_timer_keepalive(struct peer *);
61 void	session_close_connection(struct peer *);
62 void	change_state(struct peer *, enum session_state, enum session_events);
63 int	session_setup_socket(struct peer *);
64 void	session_accept(int);
65 int	session_connect(struct peer *);
66 void	session_tcp_established(struct peer *);
67 void	session_capa_ann_none(struct peer *);
68 int	session_capa_add(struct buf *, u_int8_t, u_int8_t);
69 int	session_capa_add_mp(struct buf *, u_int16_t, u_int8_t);
70 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
71 int	session_sendmsg(struct bgp_msg *, struct peer *);
72 void	session_open(struct peer *);
73 void	session_keepalive(struct peer *);
74 void	session_update(u_int32_t, void *, size_t);
75 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
76 	    ssize_t);
77 void	session_rrefresh(struct peer *, u_int16_t, u_int8_t);
78 int	session_dispatch_msg(struct pollfd *, struct peer *);
79 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
80 int	parse_open(struct peer *);
81 int	parse_update(struct peer *);
82 int	parse_refresh(struct peer *);
83 int	parse_notification(struct peer *);
84 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
85 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
86 void	session_up(struct peer *);
87 void	session_down(struct peer *);
88 void	session_demote(struct peer *, int);
89 
90 int			 la_cmp(struct listen_addr *, struct listen_addr *);
91 struct peer		*getpeerbyip(struct sockaddr *);
92 int			 session_match_mask(struct peer *, struct sockaddr *);
93 struct peer		*getpeerbyid(u_int32_t);
94 static struct sockaddr	*addr2sa(struct bgpd_addr *, u_int16_t);
95 
96 struct bgpd_config	*conf, *nconf = NULL;
97 struct bgpd_sysdep	 sysdep;
98 struct peer		*npeers;
99 volatile sig_atomic_t	 session_quit = 0;
100 int			 pending_reconf = 0;
101 int			 csock = -1, rcsock = -1;
102 u_int			 peer_cnt;
103 struct imsgbuf		*ibuf_rde;
104 struct imsgbuf		*ibuf_rde_ctl;
105 struct imsgbuf		*ibuf_main;
106 
107 struct mrt_head		 mrthead;
108 
109 void
110 session_sighdlr(int sig)
111 {
112 	switch (sig) {
113 	case SIGINT:
114 	case SIGTERM:
115 		session_quit = 1;
116 		break;
117 	}
118 }
119 
120 int
121 setup_listeners(u_int *la_cnt)
122 {
123 	int			 ttl = 255;
124 	int			 opt;
125 	struct listen_addr	*la;
126 	u_int			 cnt = 0;
127 
128 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
129 		la->reconf = RECONF_NONE;
130 		cnt++;
131 
132 		if (la->flags & LISTENER_LISTENING)
133 			continue;
134 
135 		if (la->fd == -1) {
136 			log_warn("cannot establish listener on %s: invalid fd",
137 			    log_sockaddr((struct sockaddr *)&la->sa));
138 			continue;
139 		}
140 
141 		opt = 1;
142 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
143 		    &opt, sizeof(opt)) == -1) {
144 			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
145 				log_warnx("md5sig not available, disabling");
146 				sysdep.no_md5sig = 1;
147 			} else
148 				fatal("setsockopt TCP_MD5SIG");
149 		}
150 
151 		/* set ttl to 255 so that ttl-security works */
152 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
153 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
154 			log_warn("setup_listeners setsockopt TTL");
155 			continue;
156 		}
157 
158 		session_socket_blockmode(la->fd, BM_NONBLOCK);
159 
160 		if (listen(la->fd, MAX_BACKLOG)) {
161 			close(la->fd);
162 			fatal("listen");
163 		}
164 
165 		la->flags |= LISTENER_LISTENING;
166 
167 		log_info("listening on %s",
168 		    log_sockaddr((struct sockaddr *)&la->sa));
169 	}
170 
171 	*la_cnt = cnt;
172 
173 	return (0);
174 }
175 
176 pid_t
177 session_main(struct bgpd_config *config, struct peer *cpeers,
178     struct network_head *net_l, struct filter_head *rules,
179     struct mrt_head *m_l, struct rib_names *rib_l, int pipe_m2s[2],
180     int pipe_s2r[2], int pipe_m2r[2], int pipe_s2rctl[2])
181 {
182 	int			 nfds, timeout;
183 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
184 	pid_t			 pid;
185 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
186 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
187 	u_int			 new_cnt;
188 	u_int32_t		 ctl_queued;
189 	struct passwd		*pw;
190 	struct peer		*p, **peer_l = NULL, *last, *next;
191 	struct network		*net;
192 	struct mrt		*m, *xm, **mrt_l = NULL;
193 	struct filter_rule	*r;
194 	struct pollfd		*pfd = NULL;
195 	struct ctl_conn		*ctl_conn;
196 	struct listen_addr	*la;
197 	struct rde_rib		*rr;
198 	void			*newp;
199 	short			 events;
200 
201 	conf = config;
202 	peers = cpeers;
203 
204 	switch (pid = fork()) {
205 	case -1:
206 		fatal("cannot fork");
207 	case 0:
208 		break;
209 	default:
210 		return (pid);
211 	}
212 
213 	/* control socket is outside chroot */
214 	if ((csock = control_init(0, conf->csock)) == -1)
215 		fatalx("control socket setup failed");
216 	if (conf->rcsock != NULL &&
217 	    (rcsock = control_init(1, conf->rcsock)) == -1)
218 		fatalx("control socket setup failed");
219 
220 	if ((pw = getpwnam(BGPD_USER)) == NULL)
221 		fatal(NULL);
222 
223 	if (chroot(pw->pw_dir) == -1)
224 		fatal("chroot");
225 	if (chdir("/") == -1)
226 		fatal("chdir(\"/\")");
227 
228 	setproctitle("session engine");
229 	bgpd_process = PROC_SE;
230 
231 	if (pfkey_init(&sysdep) == -1)
232 		fatalx("pfkey setup failed");
233 
234 	if (setgroups(1, &pw->pw_gid) ||
235 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
236 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
237 		fatal("can't drop privileges");
238 
239 	listener_cnt = 0;
240 	setup_listeners(&listener_cnt);
241 
242 	signal(SIGTERM, session_sighdlr);
243 	signal(SIGINT, session_sighdlr);
244 	signal(SIGPIPE, SIG_IGN);
245 	signal(SIGHUP, SIG_IGN);
246 	log_info("session engine ready");
247 	close(pipe_m2s[0]);
248 	close(pipe_s2r[1]);
249 	close(pipe_s2rctl[1]);
250 	close(pipe_m2r[0]);
251 	close(pipe_m2r[1]);
252 	init_conf(conf);
253 	if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
254 	    (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL ||
255 	    (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
256 		fatal(NULL);
257 	imsg_init(ibuf_rde, pipe_s2r[0]);
258 	imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]);
259 	imsg_init(ibuf_main, pipe_m2s[1]);
260 	TAILQ_INIT(&ctl_conns);
261 	control_listen(csock);
262 	control_listen(rcsock);
263 	LIST_INIT(&mrthead);
264 	peer_cnt = 0;
265 	ctl_cnt = 0;
266 
267 	/* filter rules are not used in the SE */
268 	while ((r = TAILQ_FIRST(rules)) != NULL) {
269 		TAILQ_REMOVE(rules, r, entry);
270 		free(r);
271 	}
272 	free(rules);
273 
274 	/* network list is not used in the SE */
275 	while ((net = TAILQ_FIRST(net_l)) != NULL) {
276 		TAILQ_REMOVE(net_l, net, entry);
277 		filterset_free(&net->net.attrset);
278 		free(net);
279 	}
280 
281 	/* main mrt list is not used in the SE */
282 	while ((m = LIST_FIRST(m_l)) != NULL) {
283 		LIST_REMOVE(m, entry);
284 		free(m);
285 	}
286 	/* rib names not used in the SE */
287 	while ((rr = SIMPLEQ_FIRST(&ribnames))) {
288 		SIMPLEQ_REMOVE_HEAD(&ribnames, entry);
289 		free(rr);
290 	}
291 
292 	while (session_quit == 0) {
293 		/* check for peers to be initialized or deleted */
294 		last = NULL;
295 		for (p = peers; p != NULL; p = next) {
296 			next = p->next;
297 			if (!pending_reconf) {
298 				/* cloned peer that idled out? */
299 				if (p->state == STATE_IDLE && p->conf.cloned &&
300 				    time(NULL) - p->stats.last_updown >=
301 				    INTERVAL_HOLD_CLONED)
302 					p->conf.reconf_action = RECONF_DELETE;
303 
304 				/* new peer that needs init? */
305 				if (p->state == STATE_NONE)
306 					init_peer(p);
307 
308 				/* reinit due? */
309 				if (p->conf.reconf_action == RECONF_REINIT) {
310 					session_stop(p, ERR_CEASE_ADMIN_RESET);
311 					timer_set(p, Timer_IdleHold, 0);
312 				}
313 
314 				/* deletion due? */
315 				if (p->conf.reconf_action == RECONF_DELETE) {
316 					if (p->demoted)
317 						session_demote(p, -1);
318 					p->conf.demote_group[0] = 0;
319 					session_stop(p, ERR_CEASE_PEER_UNCONF);
320 					log_peer_warnx(&p->conf, "removed");
321 					if (last != NULL)
322 						last->next = next;
323 					else
324 						peers = next;
325 					timer_remove_all(p);
326 					free(p);
327 					peer_cnt--;
328 					continue;
329 				}
330 				p->conf.reconf_action = RECONF_NONE;
331 			}
332 			last = p;
333 		}
334 
335 		if (peer_cnt > peer_l_elms) {
336 			if ((newp = realloc(peer_l, sizeof(struct peer *) *
337 			    peer_cnt)) == NULL) {
338 				/* panic for now  */
339 				log_warn("could not resize peer_l from %u -> %u"
340 				    " entries", peer_l_elms, peer_cnt);
341 				fatalx("exiting");
342 			}
343 			peer_l = newp;
344 			peer_l_elms = peer_cnt;
345 		}
346 
347 		mrt_cnt = 0;
348 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
349 			xm = LIST_NEXT(m, entry);
350 			if (m->state == MRT_STATE_REMOVE) {
351 				mrt_clean(m);
352 				LIST_REMOVE(m, entry);
353 				free(m);
354 				continue;
355 			}
356 			if (m->wbuf.queued)
357 				mrt_cnt++;
358 		}
359 
360 		if (mrt_cnt > mrt_l_elms) {
361 			if ((newp = realloc(mrt_l, sizeof(struct mrt *) *
362 			    mrt_cnt)) == NULL) {
363 				/* panic for now  */
364 				log_warn("could not resize mrt_l from %u -> %u"
365 				    " entries", mrt_l_elms, mrt_cnt);
366 				fatalx("exiting");
367 			}
368 			mrt_l = newp;
369 			mrt_l_elms = mrt_cnt;
370 		}
371 
372 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
373 		    ctl_cnt + mrt_cnt;
374 		if (new_cnt > pfd_elms) {
375 			if ((newp = realloc(pfd, sizeof(struct pollfd) *
376 			    new_cnt)) == NULL) {
377 				/* panic for now  */
378 				log_warn("could not resize pfd from %u -> %u"
379 				    " entries", pfd_elms, new_cnt);
380 				fatalx("exiting");
381 			}
382 			pfd = newp;
383 			pfd_elms = new_cnt;
384 		}
385 
386 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
387 		pfd[PFD_PIPE_MAIN].fd = ibuf_main->fd;
388 		pfd[PFD_PIPE_MAIN].events = POLLIN;
389 		if (ibuf_main->w.queued > 0)
390 			pfd[PFD_PIPE_MAIN].events |= POLLOUT;
391 		pfd[PFD_PIPE_ROUTE].fd = ibuf_rde->fd;
392 		pfd[PFD_PIPE_ROUTE].events = POLLIN;
393 		if (ibuf_rde->w.queued > 0)
394 			pfd[PFD_PIPE_ROUTE].events |= POLLOUT;
395 
396 		ctl_queued = 0;
397 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry)
398 			ctl_queued += ctl_conn->ibuf.w.queued;
399 
400 		pfd[PFD_PIPE_ROUTE_CTL].fd = ibuf_rde_ctl->fd;
401 		if (ctl_queued < SESSION_CTL_QUEUE_MAX)
402 			/*
403 			 * Do not act as unlimited buffer. Don't read in more
404 			 * messages if the ctl sockets are getting full.
405 			 */
406 			pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN;
407 		pfd[PFD_SOCK_CTL].fd = csock;
408 		pfd[PFD_SOCK_CTL].events = POLLIN;
409 		pfd[PFD_SOCK_RCTL].fd = rcsock;
410 		pfd[PFD_SOCK_RCTL].events = POLLIN;
411 
412 		i = PFD_LISTENERS_START;
413 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
414 			pfd[i].fd = la->fd;
415 			pfd[i].events = POLLIN;
416 			i++;
417 		}
418 		idx_listeners = i;
419 		timeout = 240;	/* loop every 240s at least */
420 
421 		for (p = peers; p != NULL; p = p->next) {
422 			time_t	nextaction;
423 			struct peer_timer *pt;
424 
425 			/* check timers */
426 			if ((pt = timer_nextisdue(p)) != NULL) {
427 				switch (pt->type) {
428 				case Timer_Hold:
429 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
430 					break;
431 				case Timer_ConnectRetry:
432 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
433 					break;
434 				case Timer_Keepalive:
435 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
436 					break;
437 				case Timer_IdleHold:
438 					bgp_fsm(p, EVNT_START);
439 					break;
440 				case Timer_IdleHoldReset:
441 					p->IdleHoldTime /= 2;
442 					if (p->IdleHoldTime <=
443 					    INTERVAL_IDLE_HOLD_INITIAL) {
444 						p->IdleHoldTime =
445 						    INTERVAL_IDLE_HOLD_INITIAL;
446 						timer_stop(p,
447 						    Timer_IdleHoldReset);
448 						p->errcnt = 0;
449 					} else
450 						timer_set(p,
451 						    Timer_IdleHoldReset,
452 						    p->IdleHoldTime);
453 					break;
454 				case Timer_CarpUndemote:
455 					timer_stop(p, Timer_CarpUndemote);
456 					if (p->demoted &&
457 					    p->state == STATE_ESTABLISHED)
458 						session_demote(p, -1);
459 					break;
460 				default:
461 					fatalx("King Bula lost in time");
462 				}
463 			}
464 			if ((nextaction = timer_nextduein(p)) != -1 &&
465 			    nextaction < timeout)
466 				timeout = nextaction;
467 
468 			/* are we waiting for a write? */
469 			events = POLLIN;
470 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
471 				events |= POLLOUT;
472 
473 			/* poll events */
474 			if (p->fd != -1 && events != 0) {
475 				pfd[i].fd = p->fd;
476 				pfd[i].events = events;
477 				peer_l[i - idx_listeners] = p;
478 				i++;
479 			}
480 		}
481 
482 		idx_peers = i;
483 
484 		LIST_FOREACH(m, &mrthead, entry)
485 			if (m->wbuf.queued) {
486 				pfd[i].fd = m->wbuf.fd;
487 				pfd[i].events = POLLOUT;
488 				mrt_l[i - idx_peers] = m;
489 				i++;
490 			}
491 
492 		idx_mrts = i;
493 
494 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
495 			pfd[i].fd = ctl_conn->ibuf.fd;
496 			pfd[i].events = POLLIN;
497 			if (ctl_conn->ibuf.w.queued > 0)
498 				pfd[i].events |= POLLOUT;
499 			i++;
500 		}
501 
502 		if (timeout < 0)
503 			timeout = 0;
504 		if ((nfds = poll(pfd, i, timeout * 1000)) == -1)
505 			if (errno != EINTR)
506 				fatal("poll error");
507 
508 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLOUT)
509 			if (msgbuf_write(&ibuf_main->w) < 0)
510 				fatal("pipe write error");
511 
512 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLIN) {
513 			nfds--;
514 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
515 			    &listener_cnt);
516 		}
517 
518 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLOUT)
519 			if (msgbuf_write(&ibuf_rde->w) < 0)
520 				fatal("pipe write error");
521 
522 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLIN) {
523 			nfds--;
524 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
525 			    &listener_cnt);
526 		}
527 
528 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE_CTL].revents & POLLIN) {
529 			nfds--;
530 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
531 			    &listener_cnt);
532 		}
533 
534 		if (nfds > 0 && pfd[PFD_SOCK_CTL].revents & POLLIN) {
535 			nfds--;
536 			ctl_cnt += control_accept(csock, 0);
537 		}
538 
539 		if (nfds > 0 && pfd[PFD_SOCK_RCTL].revents & POLLIN) {
540 			nfds--;
541 			ctl_cnt += control_accept(rcsock, 1);
542 		}
543 
544 		for (j = PFD_LISTENERS_START; nfds > 0 && j < idx_listeners;
545 		    j++)
546 			if (pfd[j].revents & POLLIN) {
547 				nfds--;
548 				session_accept(pfd[j].fd);
549 			}
550 
551 		for (; nfds > 0 && j < idx_peers; j++)
552 			nfds -= session_dispatch_msg(&pfd[j],
553 			    peer_l[j - idx_listeners]);
554 
555 		for (; nfds > 0 && j < idx_mrts; j++)
556 			if (pfd[j].revents & POLLOUT) {
557 				nfds--;
558 				mrt_write(mrt_l[j - idx_peers]);
559 			}
560 
561 		for (; nfds > 0 && j < i; j++)
562 			nfds -= control_dispatch_msg(&pfd[j], &ctl_cnt);
563 	}
564 
565 	while ((p = peers) != NULL) {
566 		peers = p->next;
567 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
568 		pfkey_remove(p);
569 		free(p);
570 	}
571 
572 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
573 		mrt_clean(m);
574 		LIST_REMOVE(m, entry);
575 		free(m);
576 	}
577 
578 	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
579 		TAILQ_REMOVE(conf->listen_addrs, la, entry);
580 		free(la);
581 	}
582 	free(conf->listen_addrs);
583 	free(peer_l);
584 	free(mrt_l);
585 	free(pfd);
586 
587 	msgbuf_write(&ibuf_rde->w);
588 	msgbuf_clear(&ibuf_rde->w);
589 	free(ibuf_rde);
590 	msgbuf_write(&ibuf_main->w);
591 	msgbuf_clear(&ibuf_main->w);
592 	free(ibuf_main);
593 
594 	control_shutdown(csock);
595 	control_shutdown(rcsock);
596 	log_info("session engine exiting");
597 	_exit(0);
598 }
599 
600 void
601 init_conf(struct bgpd_config *c)
602 {
603 	if (!c->holdtime)
604 		c->holdtime = INTERVAL_HOLD;
605 	if (!c->connectretry)
606 		c->connectretry = INTERVAL_CONNECTRETRY;
607 }
608 
609 void
610 init_peer(struct peer *p)
611 {
612 	TAILQ_INIT(&p->timers);
613 	p->fd = p->wbuf.fd = -1;
614 
615 	if (p->conf.if_depend[0])
616 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
617 		    p->conf.if_depend, sizeof(p->conf.if_depend));
618 	else
619 		p->depend_ok = 1;
620 
621 	peer_cnt++;
622 
623 	change_state(p, STATE_IDLE, EVNT_NONE);
624 	if (p->conf.down)
625 		timer_stop(p, Timer_IdleHold);		/* no autostart */
626 	else
627 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
628 
629 	/*
630 	 * on startup, demote if requested.
631 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
632 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
633 	 */
634 	if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0])
635 		session_demote(p, +1);
636 }
637 
638 void
639 bgp_fsm(struct peer *peer, enum session_events event)
640 {
641 	switch (peer->state) {
642 	case STATE_NONE:
643 		/* nothing */
644 		break;
645 	case STATE_IDLE:
646 		switch (event) {
647 		case EVNT_START:
648 			timer_stop(peer, Timer_Hold);
649 			timer_stop(peer, Timer_Keepalive);
650 			timer_stop(peer, Timer_IdleHold);
651 
652 			/* allocate read buffer */
653 			peer->rbuf = calloc(1, sizeof(struct buf_read));
654 			if (peer->rbuf == NULL)
655 				fatal(NULL);
656 			peer->rbuf->wpos = 0;
657 
658 			/* init write buffer */
659 			msgbuf_init(&peer->wbuf);
660 
661 			/* init pfkey - remove old if any, load new ones */
662 			pfkey_remove(peer);
663 			if (pfkey_establish(peer) == -1) {
664 				log_peer_warnx(&peer->conf,
665 				    "pfkey setup failed");
666 				return;
667 			}
668 
669 			peer->stats.last_sent_errcode = 0;
670 			peer->stats.last_sent_suberr = 0;
671 
672 			if (!peer->depend_ok)
673 				timer_stop(peer, Timer_ConnectRetry);
674 			else if (peer->passive || peer->conf.passive ||
675 			    peer->conf.template) {
676 				change_state(peer, STATE_ACTIVE, event);
677 				timer_stop(peer, Timer_ConnectRetry);
678 			} else {
679 				change_state(peer, STATE_CONNECT, event);
680 				timer_set(peer, Timer_ConnectRetry,
681 				    conf->connectretry);
682 				session_connect(peer);
683 			}
684 			peer->passive = 0;
685 			break;
686 		default:
687 			/* ignore */
688 			break;
689 		}
690 		break;
691 	case STATE_CONNECT:
692 		switch (event) {
693 		case EVNT_START:
694 			/* ignore */
695 			break;
696 		case EVNT_CON_OPEN:
697 			session_tcp_established(peer);
698 			session_open(peer);
699 			timer_stop(peer, Timer_ConnectRetry);
700 			peer->holdtime = INTERVAL_HOLD_INITIAL;
701 			start_timer_holdtime(peer);
702 			change_state(peer, STATE_OPENSENT, event);
703 			break;
704 		case EVNT_CON_OPENFAIL:
705 			timer_set(peer, Timer_ConnectRetry,
706 			    conf->connectretry);
707 			session_close_connection(peer);
708 			change_state(peer, STATE_ACTIVE, event);
709 			break;
710 		case EVNT_TIMER_CONNRETRY:
711 			timer_set(peer, Timer_ConnectRetry,
712 			    conf->connectretry);
713 			session_connect(peer);
714 			break;
715 		default:
716 			change_state(peer, STATE_IDLE, event);
717 			break;
718 		}
719 		break;
720 	case STATE_ACTIVE:
721 		switch (event) {
722 		case EVNT_START:
723 			/* ignore */
724 			break;
725 		case EVNT_CON_OPEN:
726 			session_tcp_established(peer);
727 			session_open(peer);
728 			timer_stop(peer, Timer_ConnectRetry);
729 			peer->holdtime = INTERVAL_HOLD_INITIAL;
730 			start_timer_holdtime(peer);
731 			change_state(peer, STATE_OPENSENT, event);
732 			break;
733 		case EVNT_CON_OPENFAIL:
734 			timer_set(peer, Timer_ConnectRetry,
735 			    conf->connectretry);
736 			session_close_connection(peer);
737 			change_state(peer, STATE_ACTIVE, event);
738 			break;
739 		case EVNT_TIMER_CONNRETRY:
740 			timer_set(peer, Timer_ConnectRetry,
741 			    peer->holdtime);
742 			change_state(peer, STATE_CONNECT, event);
743 			session_connect(peer);
744 			break;
745 		default:
746 			change_state(peer, STATE_IDLE, event);
747 			break;
748 		}
749 		break;
750 	case STATE_OPENSENT:
751 		switch (event) {
752 		case EVNT_START:
753 			/* ignore */
754 			break;
755 		case EVNT_STOP:
756 			change_state(peer, STATE_IDLE, event);
757 			break;
758 		case EVNT_CON_CLOSED:
759 			session_close_connection(peer);
760 			timer_set(peer, Timer_ConnectRetry,
761 			    conf->connectretry);
762 			change_state(peer, STATE_ACTIVE, event);
763 			break;
764 		case EVNT_CON_FATAL:
765 			change_state(peer, STATE_IDLE, event);
766 			break;
767 		case EVNT_TIMER_HOLDTIME:
768 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
769 			    0, NULL, 0);
770 			change_state(peer, STATE_IDLE, event);
771 			break;
772 		case EVNT_RCVD_OPEN:
773 			/* parse_open calls change_state itself on failure */
774 			if (parse_open(peer))
775 				break;
776 			session_keepalive(peer);
777 			change_state(peer, STATE_OPENCONFIRM, event);
778 			break;
779 		case EVNT_RCVD_NOTIFICATION:
780 			if (parse_notification(peer)) {
781 				change_state(peer, STATE_IDLE, event);
782 				/* don't punish, capa negotiation */
783 				timer_set(peer, Timer_IdleHold, 0);
784 				peer->IdleHoldTime /= 2;
785 			} else
786 				change_state(peer, STATE_IDLE, event);
787 			break;
788 		default:
789 			session_notification(peer, ERR_FSM, 0, NULL, 0);
790 			change_state(peer, STATE_IDLE, event);
791 			break;
792 		}
793 		break;
794 	case STATE_OPENCONFIRM:
795 		switch (event) {
796 		case EVNT_START:
797 			/* ignore */
798 			break;
799 		case EVNT_STOP:
800 			change_state(peer, STATE_IDLE, event);
801 			break;
802 		case EVNT_CON_CLOSED:
803 		case EVNT_CON_FATAL:
804 			change_state(peer, STATE_IDLE, event);
805 			break;
806 		case EVNT_TIMER_HOLDTIME:
807 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
808 			    0, NULL, 0);
809 			change_state(peer, STATE_IDLE, event);
810 			break;
811 		case EVNT_TIMER_KEEPALIVE:
812 			session_keepalive(peer);
813 			break;
814 		case EVNT_RCVD_KEEPALIVE:
815 			start_timer_holdtime(peer);
816 			change_state(peer, STATE_ESTABLISHED, event);
817 			break;
818 		case EVNT_RCVD_NOTIFICATION:
819 			parse_notification(peer);
820 			change_state(peer, STATE_IDLE, event);
821 			break;
822 		default:
823 			session_notification(peer, ERR_FSM, 0, NULL, 0);
824 			change_state(peer, STATE_IDLE, event);
825 			break;
826 		}
827 		break;
828 	case STATE_ESTABLISHED:
829 		switch (event) {
830 		case EVNT_START:
831 			/* ignore */
832 			break;
833 		case EVNT_STOP:
834 			change_state(peer, STATE_IDLE, event);
835 			break;
836 		case EVNT_CON_CLOSED:
837 		case EVNT_CON_FATAL:
838 			change_state(peer, STATE_IDLE, event);
839 			break;
840 		case EVNT_TIMER_HOLDTIME:
841 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
842 			    0, NULL, 0);
843 			change_state(peer, STATE_IDLE, event);
844 			break;
845 		case EVNT_TIMER_KEEPALIVE:
846 			session_keepalive(peer);
847 			break;
848 		case EVNT_RCVD_KEEPALIVE:
849 			start_timer_holdtime(peer);
850 			break;
851 		case EVNT_RCVD_UPDATE:
852 			start_timer_holdtime(peer);
853 			if (parse_update(peer))
854 				change_state(peer, STATE_IDLE, event);
855 			else
856 				start_timer_holdtime(peer);
857 			break;
858 		case EVNT_RCVD_NOTIFICATION:
859 			parse_notification(peer);
860 			change_state(peer, STATE_IDLE, event);
861 			break;
862 		default:
863 			session_notification(peer, ERR_FSM, 0, NULL, 0);
864 			change_state(peer, STATE_IDLE, event);
865 			break;
866 		}
867 		break;
868 	}
869 }
870 
871 void
872 start_timer_holdtime(struct peer *peer)
873 {
874 	if (peer->holdtime > 0)
875 		timer_set(peer, Timer_Hold, peer->holdtime);
876 	else
877 		timer_stop(peer, Timer_Hold);
878 }
879 
880 void
881 start_timer_keepalive(struct peer *peer)
882 {
883 	if (peer->holdtime > 0)
884 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
885 	else
886 		timer_stop(peer, Timer_Keepalive);
887 }
888 
889 void
890 session_close_connection(struct peer *peer)
891 {
892 	if (peer->fd != -1)
893 		close(peer->fd);
894 
895 	peer->fd = peer->wbuf.fd = -1;
896 }
897 
898 void
899 change_state(struct peer *peer, enum session_state state,
900     enum session_events event)
901 {
902 	struct mrt	*mrt;
903 
904 	switch (state) {
905 	case STATE_IDLE:
906 		/* carp demotion first. new peers handled in init_peer */
907 		if (peer->state == STATE_ESTABLISHED &&
908 		    peer->conf.demote_group[0] && !peer->demoted)
909 			session_demote(peer, +1);
910 
911 		/*
912 		 * try to write out what's buffered (maybe a notification),
913 		 * don't bother if it fails
914 		 */
915 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
916 			msgbuf_write(&peer->wbuf);
917 
918 		/*
919 		 * we must start the timer for the next EVNT_START
920 		 * if we are coming here due to an error and the
921 		 * session was not established successfully before, the
922 		 * starttimerinterval needs to be exponentially increased
923 		 */
924 		if (peer->IdleHoldTime == 0)
925 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
926 		peer->holdtime = INTERVAL_HOLD_INITIAL;
927 		timer_stop(peer, Timer_ConnectRetry);
928 		timer_stop(peer, Timer_Keepalive);
929 		timer_stop(peer, Timer_Hold);
930 		timer_stop(peer, Timer_IdleHoldReset);
931 		session_close_connection(peer);
932 		msgbuf_clear(&peer->wbuf);
933 		free(peer->rbuf);
934 		peer->rbuf = NULL;
935 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
936 		if (peer->state == STATE_ESTABLISHED)
937 			session_down(peer);
938 		if (event != EVNT_STOP) {
939 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
940 			if (event != EVNT_NONE &&
941 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
942 				peer->IdleHoldTime *= 2;
943 		}
944 		if (peer->state == STATE_NONE ||
945 		    peer->state == STATE_ESTABLISHED) {
946 			/* initialize capability negotiation structures */
947 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
948 			    sizeof(peer->capa.ann));
949 			if (!peer->conf.announce_capa)
950 				session_capa_ann_none(peer);
951 		}
952 		break;
953 	case STATE_CONNECT:
954 		break;
955 	case STATE_ACTIVE:
956 		break;
957 	case STATE_OPENSENT:
958 		break;
959 	case STATE_OPENCONFIRM:
960 		break;
961 	case STATE_ESTABLISHED:
962 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
963 		if (peer->demoted)
964 			timer_set(peer, Timer_CarpUndemote,
965 			    INTERVAL_HOLD_DEMOTED);
966 		session_up(peer);
967 		break;
968 	default:		/* something seriously fucked */
969 		break;
970 	}
971 
972 	log_statechange(peer, state, event);
973 	LIST_FOREACH(mrt, &mrthead, entry) {
974 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
975 			continue;
976 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
977 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
978 		    mrt->group_id == peer->conf.groupid))
979 			mrt_dump_state(mrt, peer->state, state, peer);
980 	}
981 	peer->prev_state = peer->state;
982 	peer->state = state;
983 }
984 
985 void
986 session_accept(int listenfd)
987 {
988 	int			 connfd;
989 	int			 opt;
990 	socklen_t		 len;
991 	struct sockaddr_storage	 cliaddr;
992 	struct peer		*p = NULL;
993 
994 	len = sizeof(cliaddr);
995 	if ((connfd = accept(listenfd,
996 	    (struct sockaddr *)&cliaddr, &len)) == -1) {
997 		if (errno == EWOULDBLOCK || errno == EINTR)
998 			return;
999 		else
1000 			log_warn("accept");
1001 	}
1002 
1003 	p = getpeerbyip((struct sockaddr *)&cliaddr);
1004 
1005 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1006 		if (timer_running(p, Timer_IdleHold, NULL)) {
1007 			/* fast reconnect after clear */
1008 			p->passive = 1;
1009 			bgp_fsm(p, EVNT_START);
1010 		}
1011 	}
1012 
1013 	if (p != NULL &&
1014 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1015 		if (p->fd != -1) {
1016 			if (p->state == STATE_CONNECT)
1017 				session_close_connection(p);
1018 			else {
1019 				close(connfd);
1020 				return;
1021 			}
1022 		}
1023 
1024 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1025 			log_peer_warnx(&p->conf,
1026 			    "ipsec or md5sig configured but not available");
1027 			close(connfd);
1028 			return;
1029 		}
1030 
1031 		if (p->conf.auth.method == AUTH_MD5SIG) {
1032 			if (sysdep.no_md5sig) {
1033 				log_peer_warnx(&p->conf,
1034 				    "md5sig configured but not available");
1035 				close(connfd);
1036 				return;
1037 			}
1038 			len = sizeof(opt);
1039 			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1040 			    &opt, &len) == -1)
1041 				fatal("getsockopt TCP_MD5SIG");
1042 			if (!opt) {	/* non-md5'd connection! */
1043 				log_peer_warnx(&p->conf,
1044 				    "connection attempt without md5 signature");
1045 				close(connfd);
1046 				return;
1047 			}
1048 		}
1049 		p->fd = p->wbuf.fd = connfd;
1050 		if (session_setup_socket(p)) {
1051 			close(connfd);
1052 			return;
1053 		}
1054 		session_socket_blockmode(connfd, BM_NONBLOCK);
1055 		bgp_fsm(p, EVNT_CON_OPEN);
1056 	} else {
1057 		log_conn_attempt(p, (struct sockaddr *)&cliaddr);
1058 		close(connfd);
1059 	}
1060 }
1061 
1062 int
1063 session_connect(struct peer *peer)
1064 {
1065 	int			 opt = 1;
1066 	struct sockaddr		*sa;
1067 
1068 	/*
1069 	 * we do not need the overcomplicated collision detection RFC 1771
1070 	 * describes; we simply make sure there is only ever one concurrent
1071 	 * tcp connection per peer.
1072 	 */
1073 	if (peer->fd != -1)
1074 		return (-1);
1075 
1076 	if ((peer->fd = socket(peer->conf.remote_addr.af, SOCK_STREAM,
1077 	    IPPROTO_TCP)) == -1) {
1078 		log_peer_warn(&peer->conf, "session_connect socket");
1079 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1080 		return (-1);
1081 	}
1082 
1083 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1084 		log_peer_warnx(&peer->conf,
1085 		    "ipsec or md5sig configured but not available");
1086 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1087 		return (-1);
1088 	}
1089 
1090 	if (peer->conf.auth.method == AUTH_MD5SIG) {
1091 		if (sysdep.no_md5sig) {
1092 			log_peer_warnx(&peer->conf,
1093 			    "md5sig configured but not available");
1094 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1095 			return (-1);
1096 		}
1097 		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1098 		    &opt, sizeof(opt)) == -1) {
1099 			log_peer_warn(&peer->conf, "setsockopt md5sig");
1100 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1101 			return (-1);
1102 		}
1103 	}
1104 	peer->wbuf.fd = peer->fd;
1105 
1106 	/* if update source is set we need to bind() */
1107 	if (peer->conf.local_addr.af) {
1108 		sa = addr2sa(&peer->conf.local_addr, 0);
1109 		if (bind(peer->fd, sa, sa->sa_len) == -1) {
1110 			log_peer_warn(&peer->conf, "session_connect bind");
1111 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1112 			return (-1);
1113 		}
1114 	}
1115 
1116 	if (session_setup_socket(peer)) {
1117 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1118 		return (-1);
1119 	}
1120 
1121 	session_socket_blockmode(peer->fd, BM_NONBLOCK);
1122 
1123 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT);
1124 	if (connect(peer->fd, sa, sa->sa_len) == -1) {
1125 		if (errno != EINPROGRESS) {
1126 			if (errno != peer->lasterr)
1127 				log_peer_warn(&peer->conf, "connect");
1128 			peer->lasterr = errno;
1129 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1130 			return (-1);
1131 		}
1132 	} else
1133 		bgp_fsm(peer, EVNT_CON_OPEN);
1134 
1135 	return (0);
1136 }
1137 
1138 int
1139 session_setup_socket(struct peer *p)
1140 {
1141 	int	ttl = p->conf.distance;
1142 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1143 	int	nodelay = 1;
1144 	int	bsize;
1145 
1146 	if (p->conf.ebgp && p->conf.remote_addr.af == AF_INET) {
1147 		/* set TTL to foreign router's distance - 1=direct n=multihop
1148 		   with ttlsec, we always use 255 */
1149 		if (p->conf.ttlsec) {
1150 			ttl = 256 - p->conf.distance;
1151 			if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, &ttl,
1152 			    sizeof(ttl)) == -1) {
1153 				log_peer_warn(&p->conf,
1154 				    "session_setup_socket setsockopt MINTTL");
1155 				return (-1);
1156 			}
1157 			ttl = 255;
1158 		}
1159 
1160 		if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1161 		    sizeof(ttl)) == -1) {
1162 			log_peer_warn(&p->conf,
1163 			    "session_setup_socket setsockopt TTL");
1164 			return (-1);
1165 		}
1166 	}
1167 
1168 	if (p->conf.ebgp && p->conf.remote_addr.af == AF_INET6)
1169 		/* set hoplimit to foreign router's distance */
1170 		if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl,
1171 		    sizeof(ttl)) == -1) {
1172 			log_peer_warn(&p->conf,
1173 			    "session_setup_socket setsockopt hoplimit");
1174 			return (-1);
1175 		}
1176 
1177 	/* if ttlsec is in use, set minttl */
1178 	if (p->conf.ttlsec) {
1179 		ttl = 256 - p->conf.distance;
1180 		setsockopt(p->fd, IPPROTO_IP, IP_MINTTL, &ttl, sizeof(ttl));
1181 
1182 	}
1183 
1184 	/* set TCP_NODELAY */
1185 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1186 	    sizeof(nodelay)) == -1) {
1187 		log_peer_warn(&p->conf,
1188 		    "session_setup_socket setsockopt TCP_NODELAY");
1189 		return (-1);
1190 	}
1191 
1192 	/* set precedence, see RFC 1771 appendix 5 */
1193 	if (p->conf.remote_addr.af == AF_INET &&
1194 	    setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) == -1) {
1195 		log_peer_warn(&p->conf,
1196 		    "session_setup_socket setsockopt TOS");
1197 		return (-1);
1198 	}
1199 
1200 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1201 	if (p->conf.auth.method != AUTH_NONE) {
1202 		/* try to increase bufsize. no biggie if it fails */
1203 		bsize = 65535;
1204 		while (setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1205 		    sizeof(bsize)) == -1)
1206 			bsize /= 2;
1207 		bsize = 65535;
1208 		while (setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1209 		    sizeof(bsize)) == -1)
1210 			bsize /= 2;
1211 	}
1212 
1213 	return (0);
1214 }
1215 
1216 void
1217 session_socket_blockmode(int fd, enum blockmodes bm)
1218 {
1219 	int	flags;
1220 
1221 	if ((flags = fcntl(fd, F_GETFL, 0)) == -1)
1222 		fatal("fcntl F_GETFL");
1223 
1224 	if (bm == BM_NONBLOCK)
1225 		flags |= O_NONBLOCK;
1226 	else
1227 		flags &= ~O_NONBLOCK;
1228 
1229 	if ((flags = fcntl(fd, F_SETFL, flags)) == -1)
1230 		fatal("fcntl F_SETFL");
1231 }
1232 
1233 void
1234 session_tcp_established(struct peer *peer)
1235 {
1236 	socklen_t	len;
1237 
1238 	len = sizeof(peer->sa_local);
1239 	if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local,
1240 	    &len) == -1)
1241 		log_warn("getsockname");
1242 	len = sizeof(peer->sa_remote);
1243 	if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote,
1244 	    &len) == -1)
1245 		log_warn("getpeername");
1246 }
1247 
1248 void
1249 session_capa_ann_none(struct peer *peer)
1250 {
1251 	peer->capa.ann.mp_v4 = SAFI_NONE;
1252 	peer->capa.ann.mp_v4 = SAFI_NONE;
1253 	peer->capa.ann.refresh = 0;
1254 	peer->capa.ann.restart = 0;
1255 	peer->capa.ann.as4byte = 0;
1256 }
1257 
1258 int
1259 session_capa_add(struct buf *opb, u_int8_t capa_code, u_int8_t capa_len)
1260 {
1261 	int errs = 0;
1262 
1263 	errs += buf_add(opb, &capa_code, sizeof(capa_code));
1264 	errs += buf_add(opb, &capa_len, sizeof(capa_len));
1265 	return (errs);
1266 }
1267 
1268 int
1269 session_capa_add_mp(struct buf *buf, u_int16_t afi, u_int8_t safi)
1270 {
1271 	u_int8_t		 pad = 0;
1272 	int			 errs = 0;
1273 
1274 	afi = htons(afi);
1275 	errs += buf_add(buf, &afi, sizeof(afi));
1276 	errs += buf_add(buf, &pad, sizeof(pad));
1277 	errs += buf_add(buf, &safi, sizeof(safi));
1278 
1279 	return (errs);
1280 }
1281 
1282 struct bgp_msg *
1283 session_newmsg(enum msg_type msgtype, u_int16_t len)
1284 {
1285 	struct bgp_msg		*msg;
1286 	struct msg_header	 hdr;
1287 	struct buf		*buf;
1288 	int			 errs = 0;
1289 
1290 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1291 	hdr.len = htons(len);
1292 	hdr.type = msgtype;
1293 
1294 	if ((buf = buf_open(len)) == NULL)
1295 		return (NULL);
1296 
1297 	errs += buf_add(buf, &hdr.marker, sizeof(hdr.marker));
1298 	errs += buf_add(buf, &hdr.len, sizeof(hdr.len));
1299 	errs += buf_add(buf, &hdr.type, sizeof(hdr.type));
1300 
1301 	if (errs > 0 ||
1302 	    (msg = calloc(1, sizeof(*msg))) == NULL) {
1303 		buf_free(buf);
1304 		return (NULL);
1305 	}
1306 
1307 	msg->buf = buf;
1308 	msg->type = msgtype;
1309 	msg->len = len;
1310 
1311 	return (msg);
1312 }
1313 
1314 int
1315 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1316 {
1317 	struct mrt		*mrt;
1318 
1319 	LIST_FOREACH(mrt, &mrthead, entry) {
1320 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1321 		    mrt->type == MRT_UPDATE_OUT)))
1322 			continue;
1323 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1324 		    mrt->peer_id == p->conf.id || (mrt->group_id == 0 &&
1325 		    mrt->group_id == p->conf.groupid))
1326 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1327 	}
1328 
1329 	buf_close(&p->wbuf, msg->buf);
1330 	free(msg);
1331 	return (0);
1332 }
1333 
1334 void
1335 session_open(struct peer *p)
1336 {
1337 	struct bgp_msg		*buf;
1338 	struct buf		*opb;
1339 	struct msg_open		 msg;
1340 	u_int16_t		 len;
1341 	u_int8_t		 op_type, optparamlen = 0;
1342 	u_int			 errs = 0;
1343 
1344 
1345 	if ((opb = buf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1346 	    sizeof(optparamlen))) == NULL) {
1347 		bgp_fsm(p, EVNT_CON_FATAL);
1348 		return;
1349 	}
1350 
1351 	/* multiprotocol extensions, RFC 4760 */
1352 	if (p->capa.ann.mp_v4) {	/* 4 bytes data */
1353 		errs += session_capa_add(opb, CAPA_MP, 4);
1354 		errs += session_capa_add_mp(opb, AFI_IPv4, p->capa.ann.mp_v4);
1355 	}
1356 	if (p->capa.ann.mp_v6) {	/* 4 bytes data */
1357 		errs += session_capa_add(opb, CAPA_MP, 4);
1358 		errs += session_capa_add_mp(opb, AFI_IPv6, p->capa.ann.mp_v6);
1359 	}
1360 
1361 	/* route refresh, RFC 2918 */
1362 	if (p->capa.ann.refresh)	/* no data */
1363 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1364 
1365 	/* End-of-RIB marker, RFC 4724 */
1366 	if (p->capa.ann.restart) {	/* 2 bytes data */
1367 		u_char		c[2];
1368 
1369 		c[0] = 0x80; /* we're always restarting */
1370 		c[1] = 0;
1371 		errs += session_capa_add(opb, CAPA_RESTART, 2);
1372 		errs += buf_add(opb, &c, 2);
1373 	}
1374 
1375 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1376 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1377 		u_int32_t	nas;
1378 
1379 		nas = htonl(conf->as);
1380 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1381 		errs += buf_add(opb, &nas, sizeof(nas));
1382 	}
1383 
1384 	if (buf_size(opb))
1385 		optparamlen = buf_size(opb) + sizeof(op_type) +
1386 		    sizeof(optparamlen);
1387 
1388 	len = MSGSIZE_OPEN_MIN + optparamlen;
1389 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1390 		buf_free(opb);
1391 		bgp_fsm(p, EVNT_CON_FATAL);
1392 		return;
1393 	}
1394 
1395 	msg.version = 4;
1396 	msg.myas = htons(conf->short_as);
1397 	if (p->conf.holdtime)
1398 		msg.holdtime = htons(p->conf.holdtime);
1399 	else
1400 		msg.holdtime = htons(conf->holdtime);
1401 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1402 	msg.optparamlen = optparamlen;
1403 
1404 	errs += buf_add(buf->buf, &msg.version, sizeof(msg.version));
1405 	errs += buf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1406 	errs += buf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1407 	errs += buf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1408 	errs += buf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1409 
1410 	if (optparamlen) {
1411 		op_type = OPT_PARAM_CAPABILITIES;
1412 		optparamlen = buf_size(opb);
1413 		errs += buf_add(buf->buf, &op_type, sizeof(op_type));
1414 		errs += buf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1415 		errs += buf_add(buf->buf, opb->buf, buf_size(opb));
1416 	}
1417 
1418 	buf_free(opb);
1419 
1420 	if (errs > 0) {
1421 		buf_free(buf->buf);
1422 		free(buf);
1423 		bgp_fsm(p, EVNT_CON_FATAL);
1424 		return;
1425 	}
1426 
1427 	if (session_sendmsg(buf, p) == -1) {
1428 		bgp_fsm(p, EVNT_CON_FATAL);
1429 		return;
1430 	}
1431 
1432 	p->stats.msg_sent_open++;
1433 }
1434 
1435 void
1436 session_keepalive(struct peer *p)
1437 {
1438 	struct bgp_msg		*buf;
1439 
1440 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1441 	    session_sendmsg(buf, p) == -1) {
1442 		bgp_fsm(p, EVNT_CON_FATAL);
1443 		return;
1444 	}
1445 
1446 	start_timer_keepalive(p);
1447 	p->stats.msg_sent_keepalive++;
1448 }
1449 
1450 void
1451 session_update(u_int32_t peerid, void *data, size_t datalen)
1452 {
1453 	struct peer		*p;
1454 	struct bgp_msg		*buf;
1455 
1456 	if ((p = getpeerbyid(peerid)) == NULL) {
1457 		log_warnx("no such peer: id=%u", peerid);
1458 		return;
1459 	}
1460 
1461 	if (p->state != STATE_ESTABLISHED)
1462 		return;
1463 
1464 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1465 		bgp_fsm(p, EVNT_CON_FATAL);
1466 		return;
1467 	}
1468 
1469 	if (buf_add(buf->buf, data, datalen)) {
1470 		buf_free(buf->buf);
1471 		free(buf);
1472 		bgp_fsm(p, EVNT_CON_FATAL);
1473 		return;
1474 	}
1475 
1476 	if (session_sendmsg(buf, p) == -1) {
1477 		bgp_fsm(p, EVNT_CON_FATAL);
1478 		return;
1479 	}
1480 
1481 	start_timer_keepalive(p);
1482 	p->stats.msg_sent_update++;
1483 }
1484 
1485 void
1486 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1487     void *data, ssize_t datalen)
1488 {
1489 	struct bgp_msg		*buf;
1490 	u_int			 errs = 0;
1491 
1492 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1493 		return;
1494 
1495 	if ((buf = session_newmsg(NOTIFICATION,
1496 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1497 		bgp_fsm(p, EVNT_CON_FATAL);
1498 		return;
1499 	}
1500 
1501 	errs += buf_add(buf->buf, &errcode, sizeof(errcode));
1502 	errs += buf_add(buf->buf, &subcode, sizeof(subcode));
1503 
1504 	if (datalen > 0)
1505 		errs += buf_add(buf->buf, data, datalen);
1506 
1507 	if (errs > 0) {
1508 		buf_free(buf->buf);
1509 		free(buf);
1510 		bgp_fsm(p, EVNT_CON_FATAL);
1511 		return;
1512 	}
1513 
1514 	if (session_sendmsg(buf, p) == -1) {
1515 		bgp_fsm(p, EVNT_CON_FATAL);
1516 		return;
1517 	}
1518 
1519 	p->stats.msg_sent_notification++;
1520 	p->stats.last_sent_errcode = errcode;
1521 	p->stats.last_sent_suberr = subcode;
1522 }
1523 
1524 int
1525 session_neighbor_rrefresh(struct peer *p)
1526 {
1527 	if (!p->capa.peer.refresh)
1528 		return (-1);
1529 
1530 	if (p->capa.peer.mp_v4 != SAFI_NONE)
1531 		session_rrefresh(p, AFI_IPv4, p->capa.peer.mp_v4);
1532 	if (p->capa.peer.mp_v6 != SAFI_NONE)
1533 		session_rrefresh(p, AFI_IPv6, p->capa.peer.mp_v6);
1534 
1535 	return (0);
1536 }
1537 
1538 void
1539 session_rrefresh(struct peer *p, u_int16_t afi, u_int8_t safi)
1540 {
1541 	struct bgp_msg		*buf;
1542 	int			 errs = 0;
1543 	u_int8_t		 null8 = 0;
1544 
1545 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1546 		bgp_fsm(p, EVNT_CON_FATAL);
1547 		return;
1548 	}
1549 
1550 	afi = htons(afi);
1551 	errs += buf_add(buf->buf, &afi, sizeof(afi));
1552 	errs += buf_add(buf->buf, &null8, sizeof(null8));
1553 	errs += buf_add(buf->buf, &safi, sizeof(safi));
1554 
1555 	if (errs > 0) {
1556 		buf_free(buf->buf);
1557 		free(buf);
1558 		bgp_fsm(p, EVNT_CON_FATAL);
1559 		return;
1560 	}
1561 
1562 	if (session_sendmsg(buf, p) == -1) {
1563 		bgp_fsm(p, EVNT_CON_FATAL);
1564 		return;
1565 	}
1566 
1567 	p->stats.msg_sent_rrefresh++;
1568 }
1569 
1570 int
1571 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1572 {
1573 	ssize_t		n, rpos, av, left;
1574 	socklen_t	len;
1575 	int		error, processed = 0;
1576 	u_int16_t	msglen;
1577 	u_int8_t	msgtype;
1578 
1579 	if (p->state == STATE_CONNECT) {
1580 		if (pfd->revents & POLLOUT) {
1581 			if (pfd->revents & POLLIN) {
1582 				/* error occurred */
1583 				len = sizeof(error);
1584 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1585 				    &error, &len) == -1 || error) {
1586 					if (error)
1587 						errno = error;
1588 					if (errno != p->lasterr) {
1589 						log_peer_warn(&p->conf,
1590 						    "socket error");
1591 						p->lasterr = errno;
1592 					}
1593 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1594 					return (1);
1595 				}
1596 			}
1597 			bgp_fsm(p, EVNT_CON_OPEN);
1598 			return (1);
1599 		}
1600 		if (pfd->revents & POLLHUP) {
1601 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1602 			return (1);
1603 		}
1604 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1605 			bgp_fsm(p, EVNT_CON_FATAL);
1606 			return (1);
1607 		}
1608 		return (0);
1609 	}
1610 
1611 	if (pfd->revents & POLLHUP) {
1612 		bgp_fsm(p, EVNT_CON_CLOSED);
1613 		return (1);
1614 	}
1615 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1616 		bgp_fsm(p, EVNT_CON_FATAL);
1617 		return (1);
1618 	}
1619 
1620 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1621 		if ((error = msgbuf_write(&p->wbuf)) < 0) {
1622 			if (error == -2)
1623 				log_peer_warnx(&p->conf, "Connection closed");
1624 			else
1625 				log_peer_warn(&p->conf, "write error");
1626 			bgp_fsm(p, EVNT_CON_FATAL);
1627 			return (1);
1628 		}
1629 		if (!(pfd->revents & POLLIN))
1630 			return (1);
1631 	}
1632 
1633 	if (p->rbuf && pfd->revents & POLLIN) {
1634 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1635 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1636 			if (errno != EINTR && errno != EAGAIN) {
1637 				log_peer_warn(&p->conf, "read error");
1638 				bgp_fsm(p, EVNT_CON_FATAL);
1639 			}
1640 			return (1);
1641 		}
1642 		if (n == 0) {	/* connection closed */
1643 			bgp_fsm(p, EVNT_CON_CLOSED);
1644 			return (1);
1645 		}
1646 
1647 		rpos = 0;
1648 		av = p->rbuf->wpos + n;
1649 		p->stats.last_read = time(NULL);
1650 
1651 		/*
1652 		 * session might drop to IDLE -> buffers deallocated
1653 		 * we MUST check rbuf != NULL before use
1654 		 */
1655 		for (;;) {
1656 			if (rpos + MSGSIZE_HEADER > av)
1657 				break;
1658 			if (p->rbuf == NULL)
1659 				break;
1660 			if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1661 			    &msgtype) == -1)
1662 				return (0);
1663 			if (rpos + msglen > av)
1664 				break;
1665 			p->rbuf->rptr = p->rbuf->buf + rpos;
1666 
1667 			switch (msgtype) {
1668 			case OPEN:
1669 				bgp_fsm(p, EVNT_RCVD_OPEN);
1670 				p->stats.msg_rcvd_open++;
1671 				break;
1672 			case UPDATE:
1673 				bgp_fsm(p, EVNT_RCVD_UPDATE);
1674 				p->stats.msg_rcvd_update++;
1675 				break;
1676 			case NOTIFICATION:
1677 				bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1678 				p->stats.msg_rcvd_notification++;
1679 				break;
1680 			case KEEPALIVE:
1681 				bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1682 				p->stats.msg_rcvd_keepalive++;
1683 				break;
1684 			case RREFRESH:
1685 				parse_refresh(p);
1686 				p->stats.msg_rcvd_rrefresh++;
1687 				break;
1688 			default:	/* cannot happen */
1689 				session_notification(p, ERR_HEADER,
1690 				    ERR_HDR_TYPE, &msgtype, 1);
1691 				log_warnx("received message with "
1692 				    "unknown type %u", msgtype);
1693 				bgp_fsm(p, EVNT_CON_FATAL);
1694 			}
1695 			rpos += msglen;
1696 			if (++processed > MSG_PROCESS_LIMIT)
1697 				break;
1698 		}
1699 		if (p->rbuf == NULL)
1700 			return (1);
1701 
1702 		if (rpos < av) {
1703 			left = av - rpos;
1704 			memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1705 			p->rbuf->wpos = left;
1706 		} else
1707 			p->rbuf->wpos = 0;
1708 
1709 		return (1);
1710 	}
1711 	return (0);
1712 }
1713 
1714 int
1715 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1716 {
1717 	struct mrt		*mrt;
1718 	u_char			*p;
1719 	u_int16_t		 olen;
1720 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1721 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1722 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1723 
1724 	/* caller MUST make sure we are getting 19 bytes! */
1725 	p = data;
1726 	if (memcmp(p, marker, sizeof(marker))) {
1727 		log_peer_warnx(&peer->conf, "sync error");
1728 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1729 		bgp_fsm(peer, EVNT_CON_FATAL);
1730 		return (-1);
1731 	}
1732 	p += MSGSIZE_HEADER_MARKER;
1733 
1734 	memcpy(&olen, p, 2);
1735 	*len = ntohs(olen);
1736 	p += 2;
1737 	memcpy(type, p, 1);
1738 
1739 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1740 		log_peer_warnx(&peer->conf,
1741 		    "received message: illegal length: %u byte", *len);
1742 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1743 		    &olen, sizeof(olen));
1744 		bgp_fsm(peer, EVNT_CON_FATAL);
1745 		return (-1);
1746 	}
1747 
1748 	switch (*type) {
1749 	case OPEN:
1750 		if (*len < MSGSIZE_OPEN_MIN) {
1751 			log_peer_warnx(&peer->conf,
1752 			    "received OPEN: illegal len: %u byte", *len);
1753 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1754 			    &olen, sizeof(olen));
1755 			bgp_fsm(peer, EVNT_CON_FATAL);
1756 			return (-1);
1757 		}
1758 		break;
1759 	case NOTIFICATION:
1760 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1761 			log_peer_warnx(&peer->conf,
1762 			    "received NOTIFICATION: illegal len: %u byte",
1763 			    *len);
1764 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1765 			    &olen, sizeof(olen));
1766 			bgp_fsm(peer, EVNT_CON_FATAL);
1767 			return (-1);
1768 		}
1769 		break;
1770 	case UPDATE:
1771 		if (*len < MSGSIZE_UPDATE_MIN) {
1772 			log_peer_warnx(&peer->conf,
1773 			    "received UPDATE: illegal len: %u byte", *len);
1774 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1775 			    &olen, sizeof(olen));
1776 			bgp_fsm(peer, EVNT_CON_FATAL);
1777 			return (-1);
1778 		}
1779 		break;
1780 	case KEEPALIVE:
1781 		if (*len != MSGSIZE_KEEPALIVE) {
1782 			log_peer_warnx(&peer->conf,
1783 			    "received KEEPALIVE: illegal len: %u byte", *len);
1784 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1785 			    &olen, sizeof(olen));
1786 			bgp_fsm(peer, EVNT_CON_FATAL);
1787 			return (-1);
1788 		}
1789 		break;
1790 	case RREFRESH:
1791 		if (*len != MSGSIZE_RREFRESH) {
1792 			log_peer_warnx(&peer->conf,
1793 			    "received RREFRESH: illegal len: %u byte", *len);
1794 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1795 			    &olen, sizeof(olen));
1796 			bgp_fsm(peer, EVNT_CON_FATAL);
1797 			return (-1);
1798 		}
1799 		break;
1800 	default:
1801 		log_peer_warnx(&peer->conf,
1802 		    "received msg with unknown type %u", *type);
1803 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1804 		    type, 1);
1805 		bgp_fsm(peer, EVNT_CON_FATAL);
1806 		return (-1);
1807 	}
1808 	LIST_FOREACH(mrt, &mrthead, entry) {
1809 		if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE &&
1810 		    mrt->type == MRT_UPDATE_IN)))
1811 			continue;
1812 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1813 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1814 		    mrt->group_id == peer->conf.groupid))
1815 			mrt_dump_bgp_msg(mrt, data, *len, peer);
1816 	}
1817 	return (0);
1818 }
1819 
1820 int
1821 parse_open(struct peer *peer)
1822 {
1823 	u_char		*p, *op_val;
1824 	u_int8_t	 version, rversion;
1825 	u_int16_t	 short_as, msglen;
1826 	u_int16_t	 holdtime, oholdtime, myholdtime;
1827 	u_int32_t	 as, bgpid;
1828 	u_int8_t	 optparamlen, plen;
1829 	u_int8_t	 op_type, op_len;
1830 
1831 	p = peer->rbuf->rptr;
1832 	p += MSGSIZE_HEADER_MARKER;
1833 	memcpy(&msglen, p, sizeof(msglen));
1834 	msglen = ntohs(msglen);
1835 
1836 	p = peer->rbuf->rptr;
1837 	p += MSGSIZE_HEADER;	/* header is already checked */
1838 
1839 	memcpy(&version, p, sizeof(version));
1840 	p += sizeof(version);
1841 
1842 	if (version != BGP_VERSION) {
1843 		log_peer_warnx(&peer->conf,
1844 		    "peer wants unrecognized version %u", version);
1845 		if (version > BGP_VERSION)
1846 			rversion = version - BGP_VERSION;
1847 		else
1848 			rversion = BGP_VERSION;
1849 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
1850 		    &rversion, sizeof(rversion));
1851 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1852 		return (-1);
1853 	}
1854 
1855 	memcpy(&short_as, p, sizeof(short_as));
1856 	p += sizeof(short_as);
1857 	as = peer->short_as = ntohs(short_as);
1858 
1859 	memcpy(&oholdtime, p, sizeof(oholdtime));
1860 	p += sizeof(oholdtime);
1861 
1862 	holdtime = ntohs(oholdtime);
1863 	if (holdtime && holdtime < peer->conf.min_holdtime) {
1864 		log_peer_warnx(&peer->conf,
1865 		    "peer requests unacceptable holdtime %u", holdtime);
1866 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
1867 		    NULL, 0);
1868 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1869 		return (-1);
1870 	}
1871 
1872 	myholdtime = peer->conf.holdtime;
1873 	if (!myholdtime)
1874 		myholdtime = conf->holdtime;
1875 	if (holdtime < myholdtime)
1876 		peer->holdtime = holdtime;
1877 	else
1878 		peer->holdtime = myholdtime;
1879 
1880 	memcpy(&bgpid, p, sizeof(bgpid));
1881 	p += sizeof(bgpid);
1882 
1883 	/* check bgpid for validity - just disallow 0 */
1884 	if (ntohl(bgpid) == 0) {
1885 		log_peer_warnx(&peer->conf, "peer BGPID %lu unacceptable",
1886 		    ntohl(bgpid));
1887 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
1888 		    NULL, 0);
1889 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1890 		return (-1);
1891 	}
1892 	peer->remote_bgpid = bgpid;
1893 
1894 	memcpy(&optparamlen, p, sizeof(optparamlen));
1895 	p += sizeof(optparamlen);
1896 
1897 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
1898 			log_peer_warnx(&peer->conf,
1899 			    "corrupt OPEN message received: length mismatch");
1900 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1901 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1902 			return (-1);
1903 	}
1904 
1905 	plen = optparamlen;
1906 	while (plen > 0) {
1907 		if (plen < 2) {
1908 			log_peer_warnx(&peer->conf,
1909 			    "corrupt OPEN message received, len wrong");
1910 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1911 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1912 			return (-1);
1913 		}
1914 		memcpy(&op_type, p, sizeof(op_type));
1915 		p += sizeof(op_type);
1916 		plen -= sizeof(op_type);
1917 		memcpy(&op_len, p, sizeof(op_len));
1918 		p += sizeof(op_len);
1919 		plen -= sizeof(op_len);
1920 		if (op_len > 0) {
1921 			if (plen < op_len) {
1922 				log_peer_warnx(&peer->conf,
1923 				    "corrupt OPEN message received, len wrong");
1924 				session_notification(peer, ERR_OPEN, 0,
1925 				    NULL, 0);
1926 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1927 				return (-1);
1928 			}
1929 			op_val = p;
1930 			p += op_len;
1931 			plen -= op_len;
1932 		} else
1933 			op_val = NULL;
1934 
1935 		switch (op_type) {
1936 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
1937 			if (parse_capabilities(peer, op_val, op_len,
1938 			    &as) == -1) {
1939 				session_notification(peer, ERR_OPEN, 0,
1940 				    NULL, 0);
1941 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1942 				return (-1);
1943 			}
1944 			break;
1945 		case OPT_PARAM_AUTH:			/* deprecated */
1946 		default:
1947 			/*
1948 			 * unsupported type
1949 			 * the RFCs tell us to leave the data section empty
1950 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
1951 			 * How the peer should know _which_ optional parameter
1952 			 * we don't support is beyond me.
1953 			 */
1954 			log_peer_warnx(&peer->conf,
1955 			    "received OPEN message with unsupported optional "
1956 			    "parameter: type %u", op_type);
1957 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
1958 				NULL, 0);
1959 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1960 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
1961 			peer->IdleHoldTime /= 2;
1962 			return (-1);
1963 		}
1964 	}
1965 
1966 	/* if remote-as is zero and it's a cloned neighbor, accept any */
1967 	if (peer->conf.cloned && !peer->conf.remote_as && as != AS_TRANS) {
1968 		peer->conf.remote_as = as;
1969 		peer->conf.ebgp = (peer->conf.remote_as != conf->as);
1970 		if (!peer->conf.ebgp)
1971 			/* force enforce_as off for iBGP sessions */
1972 			peer->conf.enforce_as = ENFORCE_AS_OFF;
1973 	}
1974 
1975 	if (peer->conf.remote_as != as) {
1976 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
1977 		    log_as(as));
1978 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
1979 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1980 		return (-1);
1981 	}
1982 
1983 	return (0);
1984 }
1985 
1986 int
1987 parse_update(struct peer *peer)
1988 {
1989 	u_char		*p;
1990 	u_int16_t	 datalen;
1991 
1992 	/*
1993 	 * we pass the message verbatim to the rde.
1994 	 * in case of errors the whole session is reset with a
1995 	 * notification anyway, we only need to know the peer
1996 	 */
1997 	p = peer->rbuf->rptr;
1998 	p += MSGSIZE_HEADER_MARKER;
1999 	memcpy(&datalen, p, sizeof(datalen));
2000 	datalen = ntohs(datalen);
2001 
2002 	p = peer->rbuf->rptr;
2003 	p += MSGSIZE_HEADER;	/* header is already checked */
2004 	datalen -= MSGSIZE_HEADER;
2005 
2006 	if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p,
2007 	    datalen) == -1)
2008 		return (-1);
2009 
2010 	return (0);
2011 }
2012 
2013 int
2014 parse_refresh(struct peer *peer)
2015 {
2016 	u_char		*p;
2017 	struct rrefresh	 r;
2018 
2019 	p = peer->rbuf->rptr;
2020 	p += MSGSIZE_HEADER;	/* header is already checked */
2021 
2022 	/* afi, 2 byte */
2023 	memcpy(&r.afi, p, sizeof(r.afi));
2024 	r.afi = ntohs(r.afi);
2025 	p += 2;
2026 	/* reserved, 1 byte */
2027 	p += 1;
2028 	/* safi, 1 byte */
2029 	memcpy(&r.safi, p, sizeof(r.safi));
2030 
2031 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2032 
2033 	if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &r,
2034 	    sizeof(r)) == -1)
2035 		return (-1);
2036 
2037 	return (0);
2038 }
2039 
2040 int
2041 parse_notification(struct peer *peer)
2042 {
2043 	u_char		*p;
2044 	u_int8_t	 errcode;
2045 	u_int8_t	 subcode;
2046 	u_int16_t	 datalen;
2047 	u_int8_t	 capa_code;
2048 	u_int8_t	 capa_len;
2049 
2050 	/* just log */
2051 	p = peer->rbuf->rptr;
2052 	p += MSGSIZE_HEADER_MARKER;
2053 	memcpy(&datalen, p, sizeof(datalen));
2054 	datalen = ntohs(datalen);
2055 
2056 	p = peer->rbuf->rptr;
2057 	p += MSGSIZE_HEADER;	/* header is already checked */
2058 	datalen -= MSGSIZE_HEADER;
2059 
2060 	memcpy(&errcode, p, sizeof(errcode));
2061 	p += sizeof(errcode);
2062 	datalen -= sizeof(errcode);
2063 
2064 	memcpy(&subcode, p, sizeof(subcode));
2065 	p += sizeof(subcode);
2066 	datalen -= sizeof(subcode);
2067 
2068 	log_notification(peer, errcode, subcode, p, datalen);
2069 	peer->errcnt++;
2070 
2071 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2072 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2073 			log_peer_warnx(&peer->conf, "received \"unsupported "
2074 			    "capability\" notification without data part, "
2075 			    "disabling capability announcements altogether");
2076 			session_capa_ann_none(peer);
2077 		}
2078 
2079 		while (datalen > 0) {
2080 			if (datalen < 2) {
2081 				log_peer_warnx(&peer->conf,
2082 				    "parse_notification: "
2083 				    "expect len >= 2, len is %u", datalen);
2084 				return (-1);
2085 			}
2086 			memcpy(&capa_code, p, sizeof(capa_code));
2087 			p += sizeof(capa_code);
2088 			datalen -= sizeof(capa_code);
2089 			memcpy(&capa_len, p, sizeof(capa_len));
2090 			p += sizeof(capa_len);
2091 			datalen -= sizeof(capa_len);
2092 			if (datalen < capa_len) {
2093 				log_peer_warnx(&peer->conf,
2094 				    "parse_notification: capa_len %u exceeds "
2095 				    "remaining msg length %u", capa_len,
2096 				    datalen);
2097 				return (-1);
2098 			}
2099 			p += capa_len;
2100 			datalen -= capa_len;
2101 			switch (capa_code) {
2102 			case CAPA_MP:
2103 				peer->capa.ann.mp_v4 = SAFI_NONE;
2104 				peer->capa.ann.mp_v6 = SAFI_NONE;
2105 				log_peer_warnx(&peer->conf,
2106 				    "disabling multiprotocol capability");
2107 				break;
2108 			case CAPA_REFRESH:
2109 				peer->capa.ann.refresh = 0;
2110 				log_peer_warnx(&peer->conf,
2111 				    "disabling route refresh capability");
2112 				break;
2113 			case CAPA_RESTART:
2114 				peer->capa.ann.restart = 0;
2115 				log_peer_warnx(&peer->conf,
2116 				    "disabling restart capability");
2117 				break;
2118 			case CAPA_AS4BYTE:
2119 				peer->capa.ann.as4byte = 0;
2120 				log_peer_warnx(&peer->conf,
2121 				    "disabling 4-byte AS num capability");
2122 				break;
2123 			default:	/* should not happen... */
2124 				log_peer_warnx(&peer->conf, "received "
2125 				    "\"unsupported capability\" notification "
2126 				    "for unknown capability %u, disabling "
2127 				    "capability announcements altogether",
2128 				    capa_code);
2129 				session_capa_ann_none(peer);
2130 				break;
2131 			}
2132 		}
2133 
2134 		return (1);
2135 	}
2136 
2137 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2138 		session_capa_ann_none(peer);
2139 		return (1);
2140 	}
2141 
2142 	return (0);
2143 }
2144 
2145 int
2146 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2147 {
2148 	u_int16_t	 len;
2149 	u_int8_t	 capa_code;
2150 	u_int8_t	 capa_len;
2151 	u_char		*capa_val;
2152 	u_int16_t	 mp_afi;
2153 	u_int8_t	 mp_safi;
2154 	u_int32_t	 remote_as;
2155 
2156 	len = dlen;
2157 	while (len > 0) {
2158 		if (len < 2) {
2159 			log_peer_warnx(&peer->conf, "parse_capabilities: "
2160 			    "expect len >= 2, len is %u", len);
2161 			return (-1);
2162 		}
2163 		memcpy(&capa_code, d, sizeof(capa_code));
2164 		d += sizeof(capa_code);
2165 		len -= sizeof(capa_code);
2166 		memcpy(&capa_len, d, sizeof(capa_len));
2167 		d += sizeof(capa_len);
2168 		len -= sizeof(capa_len);
2169 		if (capa_len > 0) {
2170 			if (len < capa_len) {
2171 				log_peer_warnx(&peer->conf,
2172 				    "parse_capabilities: "
2173 				    "len %u smaller than capa_len %u",
2174 				    len, capa_len);
2175 				return (-1);
2176 			}
2177 			capa_val = d;
2178 			d += capa_len;
2179 			len -= capa_len;
2180 		} else
2181 			capa_val = NULL;
2182 
2183 		switch (capa_code) {
2184 		case CAPA_MP:			/* RFC 4760 */
2185 			if (capa_len != 4) {
2186 				log_peer_warnx(&peer->conf,
2187 				    "parse_capabilities: "
2188 				    "expect len 4, len is %u", capa_len);
2189 				return (-1);
2190 			}
2191 			memcpy(&mp_afi, capa_val, sizeof(mp_afi));
2192 			mp_afi = ntohs(mp_afi);
2193 			memcpy(&mp_safi, capa_val + 3, sizeof(mp_safi));
2194 			switch (mp_afi) {
2195 			case AFI_IPv4:
2196 				if (mp_safi < 1 || mp_safi > 3)
2197 					log_peer_warnx(&peer->conf,
2198 					    "parse_capabilities: AFI IPv4, "
2199 					    "mp_safi %u unknown", mp_safi);
2200 				else
2201 					peer->capa.peer.mp_v4 = mp_safi;
2202 				break;
2203 			case AFI_IPv6:
2204 				if (mp_safi < 1 || mp_safi > 3)
2205 					log_peer_warnx(&peer->conf,
2206 					    "parse_capabilities: AFI IPv6, "
2207 					    "mp_safi %u unknown", mp_safi);
2208 				else
2209 					peer->capa.peer.mp_v6 = mp_safi;
2210 				break;
2211 			default:			/* ignore */
2212 				break;
2213 			}
2214 			break;
2215 		case CAPA_REFRESH:
2216 			peer->capa.peer.refresh = 1;
2217 			break;
2218 		case CAPA_RESTART:
2219 			peer->capa.peer.restart = 1;
2220 			/* we don't care about the further restart capas yet */
2221 			break;
2222 		case CAPA_AS4BYTE:
2223 			if (capa_len != 4) {
2224 				log_peer_warnx(&peer->conf,
2225 				    "parse_capabilities: "
2226 				    "expect len 4, len is %u", capa_len);
2227 				return (-1);
2228 			}
2229 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2230 			*as = ntohl(remote_as);
2231 			peer->capa.peer.as4byte = 1;
2232 			break;
2233 		default:
2234 			break;
2235 		}
2236 	}
2237 
2238 	return (0);
2239 }
2240 
2241 void
2242 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2243 {
2244 	struct imsg		 imsg;
2245 	struct mrt		 xmrt;
2246 	struct mrt		*mrt;
2247 	struct peer_config	*pconf;
2248 	struct peer		*p, *next;
2249 	struct listen_addr	*la, *nla;
2250 	struct kif		*kif;
2251 	u_char			*data;
2252 	enum reconf_action	 reconf;
2253 	int			 n, depend_ok;
2254 	u_int8_t		 errcode, subcode;
2255 
2256 	if ((n = imsg_read(ibuf)) == -1)
2257 		fatal("session_dispatch_imsg: imsg_read error");
2258 
2259 	if (n == 0)	/* connection closed */
2260 		fatalx("session_dispatch_imsg: pipe closed");
2261 
2262 	for (;;) {
2263 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2264 			fatal("session_dispatch_imsg: imsg_get error");
2265 
2266 		if (n == 0)
2267 			break;
2268 
2269 		switch (imsg.hdr.type) {
2270 		case IMSG_RECONF_CONF:
2271 			if (idx != PFD_PIPE_MAIN)
2272 				fatalx("reconf request not from parent");
2273 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
2274 			    NULL)
2275 				fatal(NULL);
2276 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
2277 			if ((nconf->listen_addrs = calloc(1,
2278 			    sizeof(struct listen_addrs))) == NULL)
2279 				fatal(NULL);
2280 			TAILQ_INIT(nconf->listen_addrs);
2281 			npeers = NULL;
2282 			init_conf(nconf);
2283 			pending_reconf = 1;
2284 			break;
2285 		case IMSG_RECONF_PEER:
2286 			if (idx != PFD_PIPE_MAIN)
2287 				fatalx("reconf request not from parent");
2288 			pconf = imsg.data;
2289 			p = getpeerbyaddr(&pconf->remote_addr);
2290 			if (p == NULL) {
2291 				if ((p = calloc(1, sizeof(struct peer))) ==
2292 				    NULL)
2293 					fatal("new_peer");
2294 				p->state = p->prev_state = STATE_NONE;
2295 				p->next = npeers;
2296 				npeers = p;
2297 				reconf = RECONF_REINIT;
2298 			} else
2299 				reconf = RECONF_KEEP;
2300 
2301 			memcpy(&p->conf, pconf, sizeof(struct peer_config));
2302 			p->conf.reconf_action = reconf;
2303 			break;
2304 		case IMSG_RECONF_LISTENER:
2305 			if (idx != PFD_PIPE_MAIN)
2306 				fatalx("reconf request not from parent");
2307 			if (nconf == NULL)
2308 				fatalx("IMSG_RECONF_LISTENER but no config");
2309 			nla = imsg.data;
2310 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2311 				if (!la_cmp(la, nla))
2312 					break;
2313 
2314 			if (la == NULL) {
2315 				if (nla->reconf != RECONF_REINIT)
2316 					fatalx("king bula sez: "
2317 					    "expected REINIT");
2318 
2319 				if ((nla->fd = imsg.fd) == -1)
2320 					log_warnx("expected to receive fd for "
2321 					    "%s but didn't receive any",
2322 					    log_sockaddr((struct sockaddr *)
2323 					    &nla->sa));
2324 
2325 				la = calloc(1, sizeof(struct listen_addr));
2326 				if (la == NULL)
2327 					fatal(NULL);
2328 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2329 				la->flags = nla->flags;
2330 				la->fd = nla->fd;
2331 				la->reconf = RECONF_REINIT;
2332 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2333 				    entry);
2334 			} else {
2335 				if (nla->reconf != RECONF_KEEP)
2336 					fatalx("king bula sez: expected KEEP");
2337 				la->reconf = RECONF_KEEP;
2338 			}
2339 
2340 			break;
2341 		case IMSG_RECONF_DONE:
2342 			if (idx != PFD_PIPE_MAIN)
2343 				fatalx("reconf request not from parent");
2344 			if (nconf == NULL)
2345 				fatalx("got IMSG_RECONF_DONE but no config");
2346 			conf->as = nconf->as;
2347 			conf->holdtime = nconf->holdtime;
2348 			conf->bgpid = nconf->bgpid;
2349 			conf->min_holdtime = nconf->min_holdtime;
2350 
2351 			/* add new peers */
2352 			for (p = npeers; p != NULL; p = next) {
2353 				next = p->next;
2354 				p->next = peers;
2355 				peers = p;
2356 			}
2357 			/* find ones that need attention */
2358 			for (p = peers; p != NULL; p = p->next) {
2359 				/* needs to be deleted? */
2360 				if (p->conf.reconf_action == RECONF_NONE &&
2361 				    !p->conf.cloned)
2362 					p->conf.reconf_action = RECONF_DELETE;
2363 				/* had demotion, is demoted, demote removed? */
2364 				if (p->demoted && !p->conf.demote_group[0])
2365 						session_demote(p, -1);
2366 			}
2367 
2368 			/* delete old listeners */
2369 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2370 			    la = nla) {
2371 				nla = TAILQ_NEXT(la, entry);
2372 				if (la->reconf == RECONF_NONE) {
2373 					log_info("not listening on %s any more",
2374 					    log_sockaddr(
2375 					    (struct sockaddr *)&la->sa));
2376 					TAILQ_REMOVE(conf->listen_addrs, la,
2377 					    entry);
2378 					close(la->fd);
2379 					free(la);
2380 				}
2381 			}
2382 
2383 			/* add new listeners */
2384 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2385 			    NULL) {
2386 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2387 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2388 				    entry);
2389 			}
2390 
2391 			setup_listeners(listener_cnt);
2392 			free(nconf->listen_addrs);
2393 			free(nconf);
2394 			nconf = NULL;
2395 			pending_reconf = 0;
2396 			log_info("SE reconfigured");
2397 			break;
2398 		case IMSG_IFINFO:
2399 			if (idx != PFD_PIPE_MAIN)
2400 				fatalx("IFINFO message not from parent");
2401 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2402 			    sizeof(struct kif))
2403 				fatalx("IFINFO imsg with wrong len");
2404 			kif = imsg.data;
2405 			depend_ok = (kif->flags & IFF_UP) &&
2406 			    (LINK_STATE_IS_UP(kif->link_state) ||
2407 			    (kif->link_state == LINK_STATE_UNKNOWN &&
2408 			    kif->media_type != IFT_CARP));
2409 
2410 			for (p = peers; p != NULL; p = p->next)
2411 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2412 					if (depend_ok && !p->depend_ok) {
2413 						p->depend_ok = depend_ok;
2414 						bgp_fsm(p, EVNT_START);
2415 					} else if (!depend_ok && p->depend_ok) {
2416 						p->depend_ok = depend_ok;
2417 						session_stop(p,
2418 						    ERR_CEASE_OTHER_CHANGE);
2419 					}
2420 				}
2421 			break;
2422 		case IMSG_MRT_OPEN:
2423 		case IMSG_MRT_REOPEN:
2424 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2425 			    sizeof(struct mrt)) {
2426 				log_warnx("wrong imsg len");
2427 				break;
2428 			}
2429 
2430 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2431 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2432 				log_warnx("expected to receive fd for mrt dump "
2433 				    "but didn't receive any");
2434 
2435 			mrt = mrt_get(&mrthead, &xmrt);
2436 			if (mrt == NULL) {
2437 				/* new dump */
2438 				mrt = calloc(1, sizeof(struct mrt));
2439 				if (mrt == NULL)
2440 					fatal("session_dispatch_imsg");
2441 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2442 				TAILQ_INIT(&mrt->wbuf.bufs);
2443 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2444 			} else {
2445 				/* old dump reopened */
2446 				close(mrt->wbuf.fd);
2447 				mrt->wbuf.fd = xmrt.wbuf.fd;
2448 			}
2449 			break;
2450 		case IMSG_MRT_CLOSE:
2451 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2452 			    sizeof(struct mrt)) {
2453 				log_warnx("wrong imsg len");
2454 				break;
2455 			}
2456 
2457 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2458 			mrt = mrt_get(&mrthead, &xmrt);
2459 			if (mrt != NULL) {
2460 				mrt_clean(mrt);
2461 				LIST_REMOVE(mrt, entry);
2462 				free(mrt);
2463 			}
2464 			break;
2465 		case IMSG_CTL_KROUTE:
2466 		case IMSG_CTL_KROUTE6:
2467 		case IMSG_CTL_KROUTE_ADDR:
2468 		case IMSG_CTL_SHOW_NEXTHOP:
2469 		case IMSG_CTL_SHOW_INTERFACE:
2470 			if (idx != PFD_PIPE_MAIN)
2471 				fatalx("ctl kroute request not from parent");
2472 			control_imsg_relay(&imsg);
2473 			break;
2474 		case IMSG_CTL_SHOW_RIB:
2475 		case IMSG_CTL_SHOW_RIB_PREFIX:
2476 		case IMSG_CTL_SHOW_RIB_ATTR:
2477 		case IMSG_CTL_SHOW_RIB_MEM:
2478 		case IMSG_CTL_SHOW_NETWORK:
2479 		case IMSG_CTL_SHOW_NETWORK6:
2480 		case IMSG_CTL_SHOW_NEIGHBOR:
2481 			if (idx != PFD_PIPE_ROUTE_CTL)
2482 				fatalx("ctl rib request not from RDE");
2483 			control_imsg_relay(&imsg);
2484 			break;
2485 		case IMSG_CTL_END:
2486 		case IMSG_CTL_RESULT:
2487 			control_imsg_relay(&imsg);
2488 			break;
2489 		case IMSG_UPDATE:
2490 			if (idx != PFD_PIPE_ROUTE)
2491 				fatalx("update request not from RDE");
2492 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2493 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2494 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2495 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2496 				log_warnx("RDE sent invalid update");
2497 			else
2498 				session_update(imsg.hdr.peerid, imsg.data,
2499 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2500 			break;
2501 		case IMSG_UPDATE_ERR:
2502 			if (idx != PFD_PIPE_ROUTE)
2503 				fatalx("update request not from RDE");
2504 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2505 				log_warnx("RDE sent invalid notification");
2506 				break;
2507 			}
2508 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2509 				log_warnx("no such peer: id=%u",
2510 				    imsg.hdr.peerid);
2511 				break;
2512 			}
2513 			data = imsg.data;
2514 			errcode = *data++;
2515 			subcode = *data++;
2516 
2517 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2518 				data = NULL;
2519 
2520 			session_notification(p, errcode, subcode,
2521 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2522 			switch (errcode) {
2523 			case ERR_CEASE:
2524 				switch (subcode) {
2525 				case ERR_CEASE_MAX_PREFIX:
2526 					bgp_fsm(p, EVNT_STOP);
2527 					if (p->conf.max_prefix_restart)
2528 						timer_set(p, Timer_IdleHold, 60 *
2529 						    p->conf.max_prefix_restart);
2530 					break;
2531 				default:
2532 					bgp_fsm(p, EVNT_CON_FATAL);
2533 					break;
2534 				}
2535 				break;
2536 			default:
2537 				bgp_fsm(p, EVNT_CON_FATAL);
2538 				break;
2539 			}
2540 			break;
2541 		default:
2542 			break;
2543 		}
2544 		imsg_free(&imsg);
2545 	}
2546 }
2547 
2548 int
2549 la_cmp(struct listen_addr *a, struct listen_addr *b)
2550 {
2551 	struct sockaddr_in	*in_a, *in_b;
2552 	struct sockaddr_in6	*in6_a, *in6_b;
2553 
2554 	if (a->sa.ss_family != b->sa.ss_family)
2555 		return (1);
2556 
2557 	switch (a->sa.ss_family) {
2558 	case AF_INET:
2559 		in_a = (struct sockaddr_in *)&a->sa;
2560 		in_b = (struct sockaddr_in *)&b->sa;
2561 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2562 			return (1);
2563 		if (in_a->sin_port != in_b->sin_port)
2564 			return (1);
2565 		break;
2566 	case AF_INET6:
2567 		in6_a = (struct sockaddr_in6 *)&a->sa;
2568 		in6_b = (struct sockaddr_in6 *)&b->sa;
2569 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2570 		    sizeof(struct in6_addr)))
2571 			return (1);
2572 		if (in6_a->sin6_port != in6_b->sin6_port)
2573 			return (1);
2574 		break;
2575 	default:
2576 		fatal("king bula sez: unknown address family");
2577 		/* NOTREACHED */
2578 	}
2579 
2580 	return (0);
2581 }
2582 
2583 struct peer *
2584 getpeerbyaddr(struct bgpd_addr *addr)
2585 {
2586 	struct peer *p;
2587 
2588 	/* we might want a more effective way to find peers by IP */
2589 	for (p = peers; p != NULL &&
2590 	    memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr));
2591 	    p = p->next)
2592 		;	/* nothing */
2593 
2594 	return (p);
2595 }
2596 
2597 struct peer *
2598 getpeerbydesc(const char *descr)
2599 {
2600 	struct peer	*p, *res = NULL;
2601 	int		 match = 0;
2602 
2603 	for (p = peers; p != NULL; p = p->next)
2604 		if (!strcmp(p->conf.descr, descr)) {
2605 			res = p;
2606 			match++;
2607 		}
2608 
2609 	if (match > 1)
2610 		log_info("neighbor description \"%s\" not unique, request "
2611 		    "aborted", descr);
2612 
2613 	if (match == 1)
2614 		return (res);
2615 	else
2616 		return (NULL);
2617 }
2618 
2619 struct peer *
2620 getpeerbyip(struct sockaddr *ip)
2621 {
2622 	struct peer	*p, *newpeer, *loose = NULL;
2623 	u_int32_t	 id;
2624 
2625 	/* we might want a more effective way to find peers by IP */
2626 	for (p = peers; p != NULL; p = p->next)
2627 		if (!p->conf.template &&
2628 		    p->conf.remote_addr.af == ip->sa_family) {
2629 			if (p->conf.remote_addr.af == AF_INET &&
2630 			    p->conf.remote_addr.v4.s_addr ==
2631 			    ((struct sockaddr_in *)ip)->sin_addr.s_addr)
2632 				return (p);
2633 			if (p->conf.remote_addr.af == AF_INET6 &&
2634 			    !bcmp(&p->conf.remote_addr.v6,
2635 			    &((struct sockaddr_in6 *)ip)->sin6_addr,
2636 			    sizeof(p->conf.remote_addr.v6)))
2637 				return (p);
2638 		}
2639 
2640 	/* try template matching */
2641 	for (p = peers; p != NULL; p = p->next)
2642 		if (p->conf.template &&
2643 		    p->conf.remote_addr.af == ip->sa_family &&
2644 		    session_match_mask(p, ip))
2645 			if (loose == NULL || loose->conf.remote_masklen <
2646 			    p->conf.remote_masklen)
2647 				loose = p;
2648 
2649 	if (loose != NULL) {
2650 		/* clone */
2651 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
2652 			fatal(NULL);
2653 		memcpy(newpeer, loose, sizeof(struct peer));
2654 		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
2655 			for (p = peers; p != NULL && p->conf.id != id;
2656 			    p = p->next)
2657 				;	/* nothing */
2658 			if (p == NULL) {	/* we found a free id */
2659 				newpeer->conf.id = id;
2660 				break;
2661 			}
2662 		}
2663 		if (newpeer->conf.remote_addr.af == AF_INET) {
2664 			newpeer->conf.remote_addr.v4.s_addr =
2665 			    ((struct sockaddr_in *)ip)->sin_addr.s_addr;
2666 			newpeer->conf.remote_masklen = 32;
2667 		}
2668 		if (newpeer->conf.remote_addr.af == AF_INET6) {
2669 			memcpy(&newpeer->conf.remote_addr.v6,
2670 			    &((struct sockaddr_in6 *)ip)->sin6_addr,
2671 			    sizeof(newpeer->conf.remote_addr.v6));
2672 			newpeer->conf.remote_masklen = 128;
2673 		}
2674 		newpeer->conf.template = 0;
2675 		newpeer->conf.cloned = 1;
2676 		newpeer->state = newpeer->prev_state = STATE_NONE;
2677 		newpeer->conf.reconf_action = RECONF_KEEP;
2678 		newpeer->rbuf = NULL;
2679 		init_peer(newpeer);
2680 		bgp_fsm(newpeer, EVNT_START);
2681 		newpeer->next = peers;
2682 		peers = newpeer;
2683 		return (newpeer);
2684 	}
2685 
2686 	return (NULL);
2687 }
2688 
2689 int
2690 session_match_mask(struct peer *p, struct sockaddr *ip)
2691 {
2692 	int		 i;
2693 	in_addr_t	 v4mask;
2694 	struct in6_addr	*in;
2695 	struct in6_addr	 mask;
2696 
2697 	if (p->conf.remote_addr.af == AF_INET) {
2698 		v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
2699 		if (p->conf.remote_addr.v4.s_addr ==
2700 		    ((((struct sockaddr_in *)ip)->sin_addr.s_addr) & v4mask))
2701 			return (1);
2702 		else
2703 			return (0);
2704 	}
2705 
2706 	if (p->conf.remote_addr.af == AF_INET6) {
2707 		bzero(&mask, sizeof(mask));
2708 		for (i = 0; i < p->conf.remote_masklen / 8; i++)
2709 			mask.s6_addr[i] = 0xff;
2710 		i = p->conf.remote_masklen % 8;
2711 		if (i)
2712 			mask.s6_addr[p->conf.remote_masklen / 8] = 0xff00 >> i;
2713 
2714 		in = &((struct sockaddr_in6 *)ip)->sin6_addr;
2715 
2716 		for (i = 0; i < 16; i++)
2717 			if ((in->s6_addr[i] & mask.s6_addr[i]) !=
2718 			    p->conf.remote_addr.addr8[i])
2719 				return (0);
2720 
2721 		return (1);
2722 	}
2723 
2724 	return (0);
2725 }
2726 
2727 struct peer *
2728 getpeerbyid(u_int32_t peerid)
2729 {
2730 	struct peer *p;
2731 
2732 	/* we might want a more effective way to find peers by IP */
2733 	for (p = peers; p != NULL &&
2734 	    p->conf.id != peerid; p = p->next)
2735 		;	/* nothing */
2736 
2737 	return (p);
2738 }
2739 
2740 void
2741 session_down(struct peer *peer)
2742 {
2743 	peer->stats.last_updown = time(NULL);
2744 	if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
2745 	    NULL, 0) == -1)
2746 		fatalx("imsg_compose error");
2747 }
2748 
2749 void
2750 session_up(struct peer *p)
2751 {
2752 	struct session_up	 sup;
2753 
2754 	if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
2755 	    &p->conf, sizeof(p->conf)) == -1)
2756 		fatalx("imsg_compose error");
2757 
2758 	switch (p->sa_local.ss_family) {
2759 	case AF_INET:
2760 		sup.local_addr.af = AF_INET;
2761 		memcpy(&sup.local_addr.v4,
2762 		    &((struct sockaddr_in *)&p->sa_local)->sin_addr,
2763 		    sizeof(sup.local_addr.v4));
2764 		sup.remote_addr.af = AF_INET;
2765 		memcpy(&sup.remote_addr.v4,
2766 		    &((struct sockaddr_in *)&p->sa_remote)->sin_addr,
2767 		    sizeof(sup.remote_addr.v4));
2768 		break;
2769 	case AF_INET6:
2770 		sup.local_addr.af = AF_INET6;
2771 		memcpy(&sup.local_addr.v6,
2772 		    &((struct sockaddr_in6 *)&p->sa_local)->sin6_addr,
2773 		    sizeof(sup.local_addr.v6));
2774 		sup.remote_addr.af = AF_INET6;
2775 		memcpy(&sup.remote_addr.v6,
2776 		    &((struct sockaddr_in6 *)&p->sa_remote)->sin6_addr,
2777 		    sizeof(sup.remote_addr.v6));
2778 		break;
2779 	default:
2780 		fatalx("session_up: unsupported address family");
2781 	}
2782 
2783 	sup.remote_bgpid = p->remote_bgpid;
2784 	sup.short_as = p->short_as;
2785 	memcpy(&sup.capa_announced, &p->capa.ann, sizeof(sup.capa_announced));
2786 	memcpy(&sup.capa_received, &p->capa.peer, sizeof(sup.capa_received));
2787 	p->stats.last_updown = time(NULL);
2788 	if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
2789 	    &sup, sizeof(sup)) == -1)
2790 		fatalx("imsg_compose error");
2791 }
2792 
2793 int
2794 imsg_compose_parent(int type, pid_t pid, void *data, u_int16_t datalen)
2795 {
2796 	return (imsg_compose(ibuf_main, type, 0, pid, -1, data, datalen));
2797 }
2798 
2799 int
2800 imsg_compose_rde(int type, pid_t pid, void *data, u_int16_t datalen)
2801 {
2802 	return (imsg_compose(ibuf_rde, type, 0, pid, -1, data, datalen));
2803 }
2804 
2805 static struct sockaddr *
2806 addr2sa(struct bgpd_addr *addr, u_int16_t port)
2807 {
2808 	static struct sockaddr_storage	 ss;
2809 	struct sockaddr_in		*sa_in = (struct sockaddr_in *)&ss;
2810 	struct sockaddr_in6		*sa_in6 = (struct sockaddr_in6 *)&ss;
2811 
2812 	bzero(&ss, sizeof(ss));
2813 	switch (addr->af) {
2814 	case AF_INET:
2815 		sa_in->sin_family = AF_INET;
2816 		sa_in->sin_len = sizeof(struct sockaddr_in);
2817 		sa_in->sin_addr.s_addr = addr->v4.s_addr;
2818 		sa_in->sin_port = htons(port);
2819 		break;
2820 	case AF_INET6:
2821 		sa_in6->sin6_family = AF_INET6;
2822 		sa_in6->sin6_len = sizeof(struct sockaddr_in6);
2823 		memcpy(&sa_in6->sin6_addr, &addr->v6,
2824 		    sizeof(sa_in6->sin6_addr));
2825 		sa_in6->sin6_port = htons(port);
2826 		sa_in6->sin6_scope_id = addr->scope_id;
2827 		break;
2828 	}
2829 
2830 	return ((struct sockaddr *)&ss);
2831 }
2832 
2833 void
2834 session_demote(struct peer *p, int level)
2835 {
2836 	struct demote_msg	msg;
2837 
2838 	strlcpy(msg.demote_group, p->conf.demote_group,
2839 	    sizeof(msg.demote_group));
2840 	msg.level = level;
2841 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
2842 	    &msg, sizeof(msg)) == -1)
2843 		fatalx("imsg_compose error");
2844 
2845 	p->demoted += level;
2846 }
2847 
2848 void
2849 session_stop(struct peer *peer, u_int8_t subcode)
2850 {
2851 	switch (peer->state) {
2852 	case STATE_OPENSENT:
2853 	case STATE_OPENCONFIRM:
2854 	case STATE_ESTABLISHED:
2855 		session_notification(peer, ERR_CEASE, subcode, NULL, 0);
2856 		break;
2857 	default:
2858 		/* session not open, no need to send notification */
2859 		break;
2860 	}
2861 	bgp_fsm(peer, EVNT_STOP);
2862 }
2863