xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: session.c,v 1.334 2014/01/22 04:08:08 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 
21 #include <sys/mman.h>
22 #include <sys/socket.h>
23 #include <sys/time.h>
24 #include <sys/resource.h>
25 #include <sys/un.h>
26 #include <net/if_types.h>
27 #include <netinet/in.h>
28 #include <netinet/in_systm.h>
29 #include <netinet/ip.h>
30 #include <netinet/tcp.h>
31 #include <arpa/inet.h>
32 #include <limits.h>
33 
34 #include <err.h>
35 #include <errno.h>
36 #include <fcntl.h>
37 #include <poll.h>
38 #include <pwd.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 
45 #include "bgpd.h"
46 #include "mrt.h"
47 #include "session.h"
48 
49 #define PFD_PIPE_MAIN		0
50 #define PFD_PIPE_ROUTE		1
51 #define PFD_PIPE_ROUTE_CTL	2
52 #define PFD_SOCK_CTL		3
53 #define PFD_SOCK_RCTL		4
54 #define PFD_SOCK_PFKEY		5
55 #define PFD_LISTENERS_START	6
56 
57 void	session_sighdlr(int);
58 int	setup_listeners(u_int *);
59 void	init_conf(struct bgpd_config *);
60 void	init_peer(struct peer *);
61 void	start_timer_holdtime(struct peer *);
62 void	start_timer_keepalive(struct peer *);
63 void	session_close_connection(struct peer *);
64 void	change_state(struct peer *, enum session_state, enum session_events);
65 int	session_setup_socket(struct peer *);
66 void	session_accept(int);
67 int	session_connect(struct peer *);
68 void	session_tcp_established(struct peer *);
69 void	session_capa_ann_none(struct peer *);
70 int	session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
71 int	session_capa_add_mp(struct ibuf *, u_int8_t);
72 int	session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t);
73 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
74 int	session_sendmsg(struct bgp_msg *, struct peer *);
75 void	session_open(struct peer *);
76 void	session_keepalive(struct peer *);
77 void	session_update(u_int32_t, void *, size_t);
78 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
79 	    ssize_t);
80 void	session_rrefresh(struct peer *, u_int8_t);
81 int	session_graceful_restart(struct peer *);
82 int	session_graceful_stop(struct peer *);
83 int	session_dispatch_msg(struct pollfd *, struct peer *);
84 int	session_process_msg(struct peer *);
85 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
86 int	parse_open(struct peer *);
87 int	parse_update(struct peer *);
88 int	parse_refresh(struct peer *);
89 int	parse_notification(struct peer *);
90 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
91 int	capa_neg_calc(struct peer *);
92 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
93 void	session_up(struct peer *);
94 void	session_down(struct peer *);
95 void	session_demote(struct peer *, int);
96 
97 int		 la_cmp(struct listen_addr *, struct listen_addr *);
98 struct peer	*getpeerbyip(struct sockaddr *);
99 void		 session_template_clone(struct peer *, struct sockaddr *,
100 		    u_int32_t, u_int32_t);
101 int		 session_match_mask(struct peer *, struct bgpd_addr *);
102 struct peer	*getpeerbyid(u_int32_t);
103 
104 struct bgpd_config	*conf, *nconf;
105 struct bgpd_sysdep	 sysdep;
106 struct peer		*peers, *npeers;
107 volatile sig_atomic_t	 session_quit;
108 int			 pending_reconf;
109 int			 csock = -1, rcsock = -1;
110 u_int			 peer_cnt;
111 struct imsgbuf		*ibuf_rde;
112 struct imsgbuf		*ibuf_rde_ctl;
113 struct imsgbuf		*ibuf_main;
114 
115 struct mrt_head		 mrthead;
116 time_t			 pauseaccept;
117 
118 void
119 session_sighdlr(int sig)
120 {
121 	switch (sig) {
122 	case SIGINT:
123 	case SIGTERM:
124 		session_quit = 1;
125 		break;
126 	}
127 }
128 
129 int
130 setup_listeners(u_int *la_cnt)
131 {
132 	int			 ttl = 255;
133 	int			 opt;
134 	struct listen_addr	*la;
135 	u_int			 cnt = 0;
136 
137 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
138 		la->reconf = RECONF_NONE;
139 		cnt++;
140 
141 		if (la->flags & LISTENER_LISTENING)
142 			continue;
143 
144 		if (la->fd == -1) {
145 			log_warn("cannot establish listener on %s: invalid fd",
146 			    log_sockaddr((struct sockaddr *)&la->sa));
147 			continue;
148 		}
149 
150 		opt = 1;
151 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
152 		    &opt, sizeof(opt)) == -1) {
153 			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
154 				log_warnx("md5sig not available, disabling");
155 				sysdep.no_md5sig = 1;
156 			} else
157 				fatal("setsockopt TCP_MD5SIG");
158 		}
159 
160 		/* set ttl to 255 so that ttl-security works */
161 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
162 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
163 			log_warn("setup_listeners setsockopt TTL");
164 			continue;
165 		}
166 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
167 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
168 			log_warn("setup_listeners setsockopt hoplimit");
169 			continue;
170 		}
171 
172 		session_socket_blockmode(la->fd, BM_NONBLOCK);
173 
174 		if (listen(la->fd, MAX_BACKLOG)) {
175 			close(la->fd);
176 			fatal("listen");
177 		}
178 
179 		la->flags |= LISTENER_LISTENING;
180 
181 		log_info("listening on %s",
182 		    log_sockaddr((struct sockaddr *)&la->sa));
183 	}
184 
185 	*la_cnt = cnt;
186 
187 	return (0);
188 }
189 
190 pid_t
191 session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2],
192     int pipe_s2rctl[2])
193 {
194 	int			 nfds, timeout, pfkeysock;
195 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
196 	pid_t			 pid;
197 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
198 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
199 	u_int			 new_cnt;
200 	u_int32_t		 ctl_queued;
201 	struct passwd		*pw;
202 	struct peer		*p, **peer_l = NULL, *last, *next;
203 	struct mrt		*m, *xm, **mrt_l = NULL;
204 	struct pollfd		*pfd = NULL;
205 	struct ctl_conn		*ctl_conn;
206 	struct listen_addr	*la;
207 	void			*newp;
208 	short			 events;
209 
210 	switch (pid = fork()) {
211 	case -1:
212 		fatal("cannot fork");
213 	case 0:
214 		break;
215 	default:
216 		return (pid);
217 	}
218 
219 	if ((pw = getpwnam(BGPD_USER)) == NULL)
220 		fatal(NULL);
221 
222 	if (chroot(pw->pw_dir) == -1)
223 		fatal("chroot");
224 	if (chdir("/") == -1)
225 		fatal("chdir(\"/\")");
226 
227 	setproctitle("session engine");
228 	bgpd_process = PROC_SE;
229 	pfkeysock = pfkey_init(&sysdep);
230 
231 	if (setgroups(1, &pw->pw_gid) ||
232 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
233 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
234 		fatal("can't drop privileges");
235 
236 	signal(SIGTERM, session_sighdlr);
237 	signal(SIGINT, session_sighdlr);
238 	signal(SIGPIPE, SIG_IGN);
239 	signal(SIGHUP, SIG_IGN);
240 	signal(SIGALRM, SIG_IGN);
241 	signal(SIGUSR1, SIG_IGN);
242 
243 	close(pipe_m2s[0]);
244 	close(pipe_s2r[1]);
245 	close(pipe_s2rctl[1]);
246 	close(pipe_m2r[0]);
247 	close(pipe_m2r[1]);
248 	if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
249 	    (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL ||
250 	    (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
251 		fatal(NULL);
252 	imsg_init(ibuf_rde, pipe_s2r[0]);
253 	imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]);
254 	imsg_init(ibuf_main, pipe_m2s[1]);
255 
256 	TAILQ_INIT(&ctl_conns);
257 	LIST_INIT(&mrthead);
258 	listener_cnt = 0;
259 	peer_cnt = 0;
260 	ctl_cnt = 0;
261 
262 	if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL)
263 		fatal(NULL);
264 	if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) ==
265 	    NULL)
266 		fatal(NULL);
267 	TAILQ_INIT(conf->listen_addrs);
268 
269 	log_info("session engine ready");
270 
271 	while (session_quit == 0) {
272 		/* check for peers to be initialized or deleted */
273 		last = NULL;
274 		if (!pending_reconf) {
275 			for (p = peers; p != NULL; p = next) {
276 				next = p->next;
277 				/* cloned peer that idled out? */
278 				if (p->template && (p->state == STATE_IDLE ||
279 				    p->state == STATE_ACTIVE) &&
280 				    time(NULL) - p->stats.last_updown >=
281 				    INTERVAL_HOLD_CLONED)
282 					p->conf.reconf_action = RECONF_DELETE;
283 
284 				/* new peer that needs init? */
285 				if (p->state == STATE_NONE)
286 					init_peer(p);
287 
288 				/* reinit due? */
289 				if (p->conf.reconf_action == RECONF_REINIT) {
290 					session_stop(p, ERR_CEASE_ADMIN_RESET);
291 					if (!p->conf.down)
292 						timer_set(p, Timer_IdleHold, 0);
293 				}
294 
295 				/* deletion due? */
296 				if (p->conf.reconf_action == RECONF_DELETE) {
297 					if (p->demoted)
298 						session_demote(p, -1);
299 					p->conf.demote_group[0] = 0;
300 					session_stop(p, ERR_CEASE_PEER_UNCONF);
301 					log_peer_warnx(&p->conf, "removed");
302 					if (last != NULL)
303 						last->next = next;
304 					else
305 						peers = next;
306 					timer_remove_all(p);
307 					free(p);
308 					peer_cnt--;
309 					continue;
310 				}
311 				p->conf.reconf_action = RECONF_NONE;
312 				last = p;
313 			}
314 		}
315 
316 		if (peer_cnt > peer_l_elms) {
317 			if ((newp = realloc(peer_l, sizeof(struct peer *) *
318 			    peer_cnt)) == NULL) {
319 				/* panic for now  */
320 				log_warn("could not resize peer_l from %u -> %u"
321 				    " entries", peer_l_elms, peer_cnt);
322 				fatalx("exiting");
323 			}
324 			peer_l = newp;
325 			peer_l_elms = peer_cnt;
326 		}
327 
328 		mrt_cnt = 0;
329 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
330 			xm = LIST_NEXT(m, entry);
331 			if (m->state == MRT_STATE_REMOVE) {
332 				mrt_clean(m);
333 				LIST_REMOVE(m, entry);
334 				free(m);
335 				continue;
336 			}
337 			if (m->wbuf.queued)
338 				mrt_cnt++;
339 		}
340 
341 		if (mrt_cnt > mrt_l_elms) {
342 			if ((newp = realloc(mrt_l, sizeof(struct mrt *) *
343 			    mrt_cnt)) == NULL) {
344 				/* panic for now  */
345 				log_warn("could not resize mrt_l from %u -> %u"
346 				    " entries", mrt_l_elms, mrt_cnt);
347 				fatalx("exiting");
348 			}
349 			mrt_l = newp;
350 			mrt_l_elms = mrt_cnt;
351 		}
352 
353 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
354 		    ctl_cnt + mrt_cnt;
355 		if (new_cnt > pfd_elms) {
356 			if ((newp = realloc(pfd, sizeof(struct pollfd) *
357 			    new_cnt)) == NULL) {
358 				/* panic for now  */
359 				log_warn("could not resize pfd from %u -> %u"
360 				    " entries", pfd_elms, new_cnt);
361 				fatalx("exiting");
362 			}
363 			pfd = newp;
364 			pfd_elms = new_cnt;
365 		}
366 
367 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
368 		pfd[PFD_PIPE_MAIN].fd = ibuf_main->fd;
369 		pfd[PFD_PIPE_MAIN].events = POLLIN;
370 		if (ibuf_main->w.queued > 0)
371 			pfd[PFD_PIPE_MAIN].events |= POLLOUT;
372 		pfd[PFD_PIPE_ROUTE].fd = ibuf_rde->fd;
373 		pfd[PFD_PIPE_ROUTE].events = POLLIN;
374 		if (ibuf_rde->w.queued > 0)
375 			pfd[PFD_PIPE_ROUTE].events |= POLLOUT;
376 
377 		ctl_queued = 0;
378 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry)
379 			ctl_queued += ctl_conn->ibuf.w.queued;
380 
381 		pfd[PFD_PIPE_ROUTE_CTL].fd = ibuf_rde_ctl->fd;
382 		if (ctl_queued < SESSION_CTL_QUEUE_MAX)
383 			/*
384 			 * Do not act as unlimited buffer. Don't read in more
385 			 * messages if the ctl sockets are getting full.
386 			 */
387 			pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN;
388 		if (pauseaccept == 0) {
389 			pfd[PFD_SOCK_CTL].fd = csock;
390 			pfd[PFD_SOCK_CTL].events = POLLIN;
391 			pfd[PFD_SOCK_RCTL].fd = rcsock;
392 			pfd[PFD_SOCK_RCTL].events = POLLIN;
393 		} else {
394 			pfd[PFD_SOCK_CTL].fd = -1;
395 			pfd[PFD_SOCK_RCTL].fd = -1;
396 		}
397 		pfd[PFD_SOCK_PFKEY].fd = pfkeysock;
398 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
399 
400 		i = PFD_LISTENERS_START;
401 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
402 			if (pauseaccept == 0) {
403 				pfd[i].fd = la->fd;
404 				pfd[i].events = POLLIN;
405 			} else
406 				pfd[i].fd = -1;
407 			i++;
408 		}
409 		idx_listeners = i;
410 		timeout = 240;	/* loop every 240s at least */
411 
412 		for (p = peers; p != NULL; p = p->next) {
413 			time_t	nextaction;
414 			struct peer_timer *pt;
415 
416 			/* check timers */
417 			if ((pt = timer_nextisdue(p)) != NULL) {
418 				switch (pt->type) {
419 				case Timer_Hold:
420 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
421 					break;
422 				case Timer_ConnectRetry:
423 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
424 					break;
425 				case Timer_Keepalive:
426 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
427 					break;
428 				case Timer_IdleHold:
429 					bgp_fsm(p, EVNT_START);
430 					break;
431 				case Timer_IdleHoldReset:
432 					p->IdleHoldTime /= 2;
433 					if (p->IdleHoldTime <=
434 					    INTERVAL_IDLE_HOLD_INITIAL) {
435 						p->IdleHoldTime =
436 						    INTERVAL_IDLE_HOLD_INITIAL;
437 						timer_stop(p,
438 						    Timer_IdleHoldReset);
439 						p->errcnt = 0;
440 					} else
441 						timer_set(p,
442 						    Timer_IdleHoldReset,
443 						    p->IdleHoldTime);
444 					break;
445 				case Timer_CarpUndemote:
446 					timer_stop(p, Timer_CarpUndemote);
447 					if (p->demoted &&
448 					    p->state == STATE_ESTABLISHED)
449 						session_demote(p, -1);
450 					break;
451 				case Timer_RestartTimeout:
452 					timer_stop(p, Timer_RestartTimeout);
453 					session_graceful_stop(p);
454 					break;
455 				default:
456 					fatalx("King Bula lost in time");
457 				}
458 			}
459 			if ((nextaction = timer_nextduein(p)) != -1 &&
460 			    nextaction < timeout)
461 				timeout = nextaction;
462 
463 			/* are we waiting for a write? */
464 			events = POLLIN;
465 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
466 				events |= POLLOUT;
467 			/* is there still work to do? */
468 			if (p->rbuf && p->rbuf->wpos)
469 				timeout = 0;
470 
471 			/* poll events */
472 			if (p->fd != -1 && events != 0) {
473 				pfd[i].fd = p->fd;
474 				pfd[i].events = events;
475 				peer_l[i - idx_listeners] = p;
476 				i++;
477 			}
478 		}
479 
480 		idx_peers = i;
481 
482 		LIST_FOREACH(m, &mrthead, entry)
483 			if (m->wbuf.queued) {
484 				pfd[i].fd = m->wbuf.fd;
485 				pfd[i].events = POLLOUT;
486 				mrt_l[i - idx_peers] = m;
487 				i++;
488 			}
489 
490 		idx_mrts = i;
491 
492 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
493 			pfd[i].fd = ctl_conn->ibuf.fd;
494 			pfd[i].events = POLLIN;
495 			if (ctl_conn->ibuf.w.queued > 0)
496 				pfd[i].events |= POLLOUT;
497 			i++;
498 		}
499 
500 		if (pauseaccept && timeout > 1)
501 			timeout = 1;
502 		if (timeout < 0)
503 			timeout = 0;
504 		if ((nfds = poll(pfd, i, timeout * 1000)) == -1)
505 			if (errno != EINTR)
506 				fatal("poll error");
507 
508 		/*
509 		 * If we previously saw fd exhaustion, we stop accept()
510 		 * for 1 second to throttle the accept() loop.
511 		 */
512 		if (pauseaccept && getmonotime() > pauseaccept + 1)
513 			pauseaccept = 0;
514 
515 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLOUT)
516 			if (msgbuf_write(&ibuf_main->w) <= 0 && errno != EAGAIN)
517 				fatal("pipe write error");
518 
519 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLIN) {
520 			nfds--;
521 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
522 			    &listener_cnt);
523 		}
524 
525 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLOUT)
526 			if (msgbuf_write(&ibuf_rde->w) <= 0 && errno != EAGAIN)
527 				fatal("pipe write error");
528 
529 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLIN) {
530 			nfds--;
531 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
532 			    &listener_cnt);
533 		}
534 
535 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE_CTL].revents & POLLIN) {
536 			nfds--;
537 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
538 			    &listener_cnt);
539 		}
540 
541 		if (nfds > 0 && pfd[PFD_SOCK_CTL].revents & POLLIN) {
542 			nfds--;
543 			ctl_cnt += control_accept(csock, 0);
544 		}
545 
546 		if (nfds > 0 && pfd[PFD_SOCK_RCTL].revents & POLLIN) {
547 			nfds--;
548 			ctl_cnt += control_accept(rcsock, 1);
549 		}
550 
551 		if (nfds > 0 && pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
552 			nfds--;
553 			if (pfkey_read(pfkeysock, NULL) == -1) {
554 				log_warnx("pfkey_read failed, exiting...");
555 				session_quit = 1;
556 			}
557 		}
558 
559 		for (j = PFD_LISTENERS_START; nfds > 0 && j < idx_listeners;
560 		    j++)
561 			if (pfd[j].revents & POLLIN) {
562 				nfds--;
563 				session_accept(pfd[j].fd);
564 			}
565 
566 		for (; nfds > 0 && j < idx_peers; j++)
567 			nfds -= session_dispatch_msg(&pfd[j],
568 			    peer_l[j - idx_listeners]);
569 
570 		for (p = peers; p != NULL; p = p->next)
571 			if (p->rbuf && p->rbuf->wpos)
572 				session_process_msg(p);
573 
574 		for (; nfds > 0 && j < idx_mrts; j++)
575 			if (pfd[j].revents & POLLOUT) {
576 				nfds--;
577 				mrt_write(mrt_l[j - idx_peers]);
578 			}
579 
580 		for (; nfds > 0 && j < i; j++)
581 			nfds -= control_dispatch_msg(&pfd[j], &ctl_cnt);
582 	}
583 
584 	while ((p = peers) != NULL) {
585 		peers = p->next;
586 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
587 		pfkey_remove(p);
588 		free(p);
589 	}
590 
591 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
592 		mrt_clean(m);
593 		LIST_REMOVE(m, entry);
594 		free(m);
595 	}
596 
597 	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
598 		TAILQ_REMOVE(conf->listen_addrs, la, entry);
599 		free(la);
600 	}
601 	free(conf->listen_addrs);
602 	free(peer_l);
603 	free(mrt_l);
604 	free(pfd);
605 
606 	msgbuf_write(&ibuf_rde->w);
607 	msgbuf_clear(&ibuf_rde->w);
608 	free(ibuf_rde);
609 	msgbuf_write(&ibuf_main->w);
610 	msgbuf_clear(&ibuf_main->w);
611 	free(ibuf_main);
612 
613 	control_shutdown(csock);
614 	control_shutdown(rcsock);
615 	log_info("session engine exiting");
616 	_exit(0);
617 }
618 
619 void
620 init_conf(struct bgpd_config *c)
621 {
622 	if (!c->holdtime)
623 		c->holdtime = INTERVAL_HOLD;
624 	if (!c->connectretry)
625 		c->connectretry = INTERVAL_CONNECTRETRY;
626 }
627 
628 void
629 init_peer(struct peer *p)
630 {
631 	TAILQ_INIT(&p->timers);
632 	p->fd = p->wbuf.fd = -1;
633 
634 	if (p->conf.if_depend[0])
635 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
636 		    p->conf.if_depend, sizeof(p->conf.if_depend));
637 	else
638 		p->depend_ok = 1;
639 
640 	peer_cnt++;
641 
642 	change_state(p, STATE_IDLE, EVNT_NONE);
643 	if (p->conf.down)
644 		timer_stop(p, Timer_IdleHold);		/* no autostart */
645 	else
646 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
647 
648 	/*
649 	 * on startup, demote if requested.
650 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
651 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
652 	 */
653 	if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0])
654 		session_demote(p, +1);
655 }
656 
657 void
658 bgp_fsm(struct peer *peer, enum session_events event)
659 {
660 	switch (peer->state) {
661 	case STATE_NONE:
662 		/* nothing */
663 		break;
664 	case STATE_IDLE:
665 		switch (event) {
666 		case EVNT_START:
667 			timer_stop(peer, Timer_Hold);
668 			timer_stop(peer, Timer_Keepalive);
669 			timer_stop(peer, Timer_IdleHold);
670 
671 			/* allocate read buffer */
672 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
673 			if (peer->rbuf == NULL)
674 				fatal(NULL);
675 
676 			/* init write buffer */
677 			msgbuf_init(&peer->wbuf);
678 
679 			/* init pfkey - remove old if any, load new ones */
680 			pfkey_remove(peer);
681 			if (pfkey_establish(peer) == -1) {
682 				log_peer_warnx(&peer->conf,
683 				    "pfkey setup failed");
684 				return;
685 			}
686 
687 			peer->stats.last_sent_errcode = 0;
688 			peer->stats.last_sent_suberr = 0;
689 
690 			if (!peer->depend_ok)
691 				timer_stop(peer, Timer_ConnectRetry);
692 			else if (peer->passive || peer->conf.passive ||
693 			    peer->conf.template) {
694 				change_state(peer, STATE_ACTIVE, event);
695 				timer_stop(peer, Timer_ConnectRetry);
696 			} else {
697 				change_state(peer, STATE_CONNECT, event);
698 				timer_set(peer, Timer_ConnectRetry,
699 				    conf->connectretry);
700 				session_connect(peer);
701 			}
702 			peer->passive = 0;
703 			break;
704 		default:
705 			/* ignore */
706 			break;
707 		}
708 		break;
709 	case STATE_CONNECT:
710 		switch (event) {
711 		case EVNT_START:
712 			/* ignore */
713 			break;
714 		case EVNT_CON_OPEN:
715 			session_tcp_established(peer);
716 			session_open(peer);
717 			timer_stop(peer, Timer_ConnectRetry);
718 			peer->holdtime = INTERVAL_HOLD_INITIAL;
719 			start_timer_holdtime(peer);
720 			change_state(peer, STATE_OPENSENT, event);
721 			break;
722 		case EVNT_CON_OPENFAIL:
723 			timer_set(peer, Timer_ConnectRetry,
724 			    conf->connectretry);
725 			session_close_connection(peer);
726 			change_state(peer, STATE_ACTIVE, event);
727 			break;
728 		case EVNT_TIMER_CONNRETRY:
729 			timer_set(peer, Timer_ConnectRetry,
730 			    conf->connectretry);
731 			session_connect(peer);
732 			break;
733 		default:
734 			change_state(peer, STATE_IDLE, event);
735 			break;
736 		}
737 		break;
738 	case STATE_ACTIVE:
739 		switch (event) {
740 		case EVNT_START:
741 			/* ignore */
742 			break;
743 		case EVNT_CON_OPEN:
744 			session_tcp_established(peer);
745 			session_open(peer);
746 			timer_stop(peer, Timer_ConnectRetry);
747 			peer->holdtime = INTERVAL_HOLD_INITIAL;
748 			start_timer_holdtime(peer);
749 			change_state(peer, STATE_OPENSENT, event);
750 			break;
751 		case EVNT_CON_OPENFAIL:
752 			timer_set(peer, Timer_ConnectRetry,
753 			    conf->connectretry);
754 			session_close_connection(peer);
755 			change_state(peer, STATE_ACTIVE, event);
756 			break;
757 		case EVNT_TIMER_CONNRETRY:
758 			timer_set(peer, Timer_ConnectRetry,
759 			    peer->holdtime);
760 			change_state(peer, STATE_CONNECT, event);
761 			session_connect(peer);
762 			break;
763 		default:
764 			change_state(peer, STATE_IDLE, event);
765 			break;
766 		}
767 		break;
768 	case STATE_OPENSENT:
769 		switch (event) {
770 		case EVNT_START:
771 			/* ignore */
772 			break;
773 		case EVNT_STOP:
774 			change_state(peer, STATE_IDLE, event);
775 			break;
776 		case EVNT_CON_CLOSED:
777 			session_close_connection(peer);
778 			timer_set(peer, Timer_ConnectRetry,
779 			    conf->connectretry);
780 			change_state(peer, STATE_ACTIVE, event);
781 			break;
782 		case EVNT_CON_FATAL:
783 			change_state(peer, STATE_IDLE, event);
784 			break;
785 		case EVNT_TIMER_HOLDTIME:
786 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
787 			    0, NULL, 0);
788 			change_state(peer, STATE_IDLE, event);
789 			break;
790 		case EVNT_RCVD_OPEN:
791 			/* parse_open calls change_state itself on failure */
792 			if (parse_open(peer))
793 				break;
794 			session_keepalive(peer);
795 			change_state(peer, STATE_OPENCONFIRM, event);
796 			break;
797 		case EVNT_RCVD_NOTIFICATION:
798 			if (parse_notification(peer)) {
799 				change_state(peer, STATE_IDLE, event);
800 				/* don't punish, capa negotiation */
801 				timer_set(peer, Timer_IdleHold, 0);
802 				peer->IdleHoldTime /= 2;
803 			} else
804 				change_state(peer, STATE_IDLE, event);
805 			break;
806 		default:
807 			session_notification(peer,
808 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
809 			change_state(peer, STATE_IDLE, event);
810 			break;
811 		}
812 		break;
813 	case STATE_OPENCONFIRM:
814 		switch (event) {
815 		case EVNT_START:
816 			/* ignore */
817 			break;
818 		case EVNT_STOP:
819 			change_state(peer, STATE_IDLE, event);
820 			break;
821 		case EVNT_CON_CLOSED:
822 		case EVNT_CON_FATAL:
823 			change_state(peer, STATE_IDLE, event);
824 			break;
825 		case EVNT_TIMER_HOLDTIME:
826 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
827 			    0, NULL, 0);
828 			change_state(peer, STATE_IDLE, event);
829 			break;
830 		case EVNT_TIMER_KEEPALIVE:
831 			session_keepalive(peer);
832 			break;
833 		case EVNT_RCVD_KEEPALIVE:
834 			start_timer_holdtime(peer);
835 			change_state(peer, STATE_ESTABLISHED, event);
836 			break;
837 		case EVNT_RCVD_NOTIFICATION:
838 			parse_notification(peer);
839 			change_state(peer, STATE_IDLE, event);
840 			break;
841 		default:
842 			session_notification(peer,
843 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
844 			change_state(peer, STATE_IDLE, event);
845 			break;
846 		}
847 		break;
848 	case STATE_ESTABLISHED:
849 		switch (event) {
850 		case EVNT_START:
851 			/* ignore */
852 			break;
853 		case EVNT_STOP:
854 			change_state(peer, STATE_IDLE, event);
855 			break;
856 		case EVNT_CON_CLOSED:
857 		case EVNT_CON_FATAL:
858 			change_state(peer, STATE_IDLE, event);
859 			break;
860 		case EVNT_TIMER_HOLDTIME:
861 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
862 			    0, NULL, 0);
863 			change_state(peer, STATE_IDLE, event);
864 			break;
865 		case EVNT_TIMER_KEEPALIVE:
866 			session_keepalive(peer);
867 			break;
868 		case EVNT_RCVD_KEEPALIVE:
869 			start_timer_holdtime(peer);
870 			break;
871 		case EVNT_RCVD_UPDATE:
872 			start_timer_holdtime(peer);
873 			if (parse_update(peer))
874 				change_state(peer, STATE_IDLE, event);
875 			else
876 				start_timer_holdtime(peer);
877 			break;
878 		case EVNT_RCVD_NOTIFICATION:
879 			parse_notification(peer);
880 			change_state(peer, STATE_IDLE, event);
881 			break;
882 		default:
883 			session_notification(peer,
884 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
885 			change_state(peer, STATE_IDLE, event);
886 			break;
887 		}
888 		break;
889 	}
890 }
891 
892 void
893 start_timer_holdtime(struct peer *peer)
894 {
895 	if (peer->holdtime > 0)
896 		timer_set(peer, Timer_Hold, peer->holdtime);
897 	else
898 		timer_stop(peer, Timer_Hold);
899 }
900 
901 void
902 start_timer_keepalive(struct peer *peer)
903 {
904 	if (peer->holdtime > 0)
905 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
906 	else
907 		timer_stop(peer, Timer_Keepalive);
908 }
909 
910 void
911 session_close_connection(struct peer *peer)
912 {
913 	if (peer->fd != -1) {
914 		close(peer->fd);
915 		pauseaccept = 0;
916 	}
917 	peer->fd = peer->wbuf.fd = -1;
918 }
919 
920 void
921 change_state(struct peer *peer, enum session_state state,
922     enum session_events event)
923 {
924 	struct mrt	*mrt;
925 
926 	switch (state) {
927 	case STATE_IDLE:
928 		/* carp demotion first. new peers handled in init_peer */
929 		if (peer->state == STATE_ESTABLISHED &&
930 		    peer->conf.demote_group[0] && !peer->demoted)
931 			session_demote(peer, +1);
932 
933 		/*
934 		 * try to write out what's buffered (maybe a notification),
935 		 * don't bother if it fails
936 		 */
937 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
938 			msgbuf_write(&peer->wbuf);
939 
940 		/*
941 		 * we must start the timer for the next EVNT_START
942 		 * if we are coming here due to an error and the
943 		 * session was not established successfully before, the
944 		 * starttimerinterval needs to be exponentially increased
945 		 */
946 		if (peer->IdleHoldTime == 0)
947 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
948 		peer->holdtime = INTERVAL_HOLD_INITIAL;
949 		timer_stop(peer, Timer_ConnectRetry);
950 		timer_stop(peer, Timer_Keepalive);
951 		timer_stop(peer, Timer_Hold);
952 		timer_stop(peer, Timer_IdleHold);
953 		timer_stop(peer, Timer_IdleHoldReset);
954 		session_close_connection(peer);
955 		msgbuf_clear(&peer->wbuf);
956 		free(peer->rbuf);
957 		peer->rbuf = NULL;
958 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
959 
960 		if (event != EVNT_STOP) {
961 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
962 			if (event != EVNT_NONE &&
963 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
964 				peer->IdleHoldTime *= 2;
965 		}
966 		if (peer->state == STATE_ESTABLISHED) {
967 			if (peer->capa.neg.grestart.restart == 2 &&
968 			    (event == EVNT_CON_CLOSED ||
969 			    event == EVNT_CON_FATAL)) {
970 				/* don't punish graceful restart */
971 				timer_set(peer, Timer_IdleHold, 0);
972 				peer->IdleHoldTime /= 2;
973 				session_graceful_restart(peer);
974 			} else
975 				session_down(peer);
976 		}
977 		if (peer->state == STATE_NONE ||
978 		    peer->state == STATE_ESTABLISHED) {
979 			/* initialize capability negotiation structures */
980 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
981 			    sizeof(peer->capa.ann));
982 			if (!peer->conf.announce_capa)
983 				session_capa_ann_none(peer);
984 		}
985 		break;
986 	case STATE_CONNECT:
987 		if (peer->state == STATE_ESTABLISHED &&
988 		    peer->capa.neg.grestart.restart == 2) {
989 			/* do the graceful restart dance */
990 			session_graceful_restart(peer);
991 			peer->holdtime = INTERVAL_HOLD_INITIAL;
992 			timer_stop(peer, Timer_ConnectRetry);
993 			timer_stop(peer, Timer_Keepalive);
994 			timer_stop(peer, Timer_Hold);
995 			timer_stop(peer, Timer_IdleHold);
996 			timer_stop(peer, Timer_IdleHoldReset);
997 			session_close_connection(peer);
998 			msgbuf_clear(&peer->wbuf);
999 			bzero(&peer->capa.peer, sizeof(peer->capa.peer));
1000 		}
1001 		break;
1002 	case STATE_ACTIVE:
1003 		break;
1004 	case STATE_OPENSENT:
1005 		break;
1006 	case STATE_OPENCONFIRM:
1007 		break;
1008 	case STATE_ESTABLISHED:
1009 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
1010 		if (peer->demoted)
1011 			timer_set(peer, Timer_CarpUndemote,
1012 			    INTERVAL_HOLD_DEMOTED);
1013 		session_up(peer);
1014 		break;
1015 	default:		/* something seriously fucked */
1016 		break;
1017 	}
1018 
1019 	log_statechange(peer, state, event);
1020 	LIST_FOREACH(mrt, &mrthead, entry) {
1021 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
1022 			continue;
1023 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1024 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1025 		    mrt->group_id == peer->conf.groupid))
1026 			mrt_dump_state(mrt, peer->state, state, peer);
1027 	}
1028 	peer->prev_state = peer->state;
1029 	peer->state = state;
1030 }
1031 
1032 void
1033 session_accept(int listenfd)
1034 {
1035 	int			 connfd;
1036 	int			 opt;
1037 	socklen_t		 len;
1038 	struct sockaddr_storage	 cliaddr;
1039 	struct peer		*p = NULL;
1040 
1041 	len = sizeof(cliaddr);
1042 	if ((connfd = accept(listenfd,
1043 	    (struct sockaddr *)&cliaddr, &len)) == -1) {
1044 		if (errno == ENFILE || errno == EMFILE)
1045 			pauseaccept = getmonotime();
1046 		else if (errno != EWOULDBLOCK && errno != EINTR &&
1047 		    errno != ECONNABORTED)
1048 			log_warn("accept");
1049 		return;
1050 	}
1051 
1052 	p = getpeerbyip((struct sockaddr *)&cliaddr);
1053 
1054 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1055 		if (timer_running(p, Timer_IdleHold, NULL)) {
1056 			/* fast reconnect after clear */
1057 			p->passive = 1;
1058 			bgp_fsm(p, EVNT_START);
1059 		}
1060 	}
1061 
1062 	if (p != NULL &&
1063 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1064 		if (p->fd != -1) {
1065 			if (p->state == STATE_CONNECT)
1066 				session_close_connection(p);
1067 			else {
1068 				close(connfd);
1069 				return;
1070 			}
1071 		}
1072 
1073 open:
1074 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1075 			log_peer_warnx(&p->conf,
1076 			    "ipsec or md5sig configured but not available");
1077 			close(connfd);
1078 			return;
1079 		}
1080 
1081 		if (p->conf.auth.method == AUTH_MD5SIG) {
1082 			if (sysdep.no_md5sig) {
1083 				log_peer_warnx(&p->conf,
1084 				    "md5sig configured but not available");
1085 				close(connfd);
1086 				return;
1087 			}
1088 			len = sizeof(opt);
1089 			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1090 			    &opt, &len) == -1)
1091 				fatal("getsockopt TCP_MD5SIG");
1092 			if (!opt) {	/* non-md5'd connection! */
1093 				log_peer_warnx(&p->conf,
1094 				    "connection attempt without md5 signature");
1095 				close(connfd);
1096 				return;
1097 			}
1098 		}
1099 		p->fd = p->wbuf.fd = connfd;
1100 		if (session_setup_socket(p)) {
1101 			close(connfd);
1102 			return;
1103 		}
1104 		session_socket_blockmode(connfd, BM_NONBLOCK);
1105 		bgp_fsm(p, EVNT_CON_OPEN);
1106 		return;
1107 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1108 	    p->capa.neg.grestart.restart == 2) {
1109 		/* first do the graceful restart dance */
1110 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1111 		/* then do part of the open dance */
1112 		goto open;
1113 	} else {
1114 		log_conn_attempt(p, (struct sockaddr *)&cliaddr);
1115 		close(connfd);
1116 	}
1117 }
1118 
1119 int
1120 session_connect(struct peer *peer)
1121 {
1122 	int			 opt = 1;
1123 	struct sockaddr		*sa;
1124 
1125 	/*
1126 	 * we do not need the overcomplicated collision detection RFC 1771
1127 	 * describes; we simply make sure there is only ever one concurrent
1128 	 * tcp connection per peer.
1129 	 */
1130 	if (peer->fd != -1)
1131 		return (-1);
1132 
1133 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), SOCK_STREAM,
1134 	    IPPROTO_TCP)) == -1) {
1135 		log_peer_warn(&peer->conf, "session_connect socket");
1136 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1137 		return (-1);
1138 	}
1139 
1140 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1141 		log_peer_warnx(&peer->conf,
1142 		    "ipsec or md5sig configured but not available");
1143 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1144 		return (-1);
1145 	}
1146 
1147 	if (peer->conf.auth.method == AUTH_MD5SIG) {
1148 		if (sysdep.no_md5sig) {
1149 			log_peer_warnx(&peer->conf,
1150 			    "md5sig configured but not available");
1151 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1152 			return (-1);
1153 		}
1154 		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1155 		    &opt, sizeof(opt)) == -1) {
1156 			log_peer_warn(&peer->conf, "setsockopt md5sig");
1157 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1158 			return (-1);
1159 		}
1160 	}
1161 	peer->wbuf.fd = peer->fd;
1162 
1163 	/* if update source is set we need to bind() */
1164 	if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) {
1165 		if (bind(peer->fd, sa, sa->sa_len) == -1) {
1166 			log_peer_warn(&peer->conf, "session_connect bind");
1167 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1168 			return (-1);
1169 		}
1170 	}
1171 
1172 	if (session_setup_socket(peer)) {
1173 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1174 		return (-1);
1175 	}
1176 
1177 	session_socket_blockmode(peer->fd, BM_NONBLOCK);
1178 
1179 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT);
1180 	if (connect(peer->fd, sa, sa->sa_len) == -1) {
1181 		if (errno != EINPROGRESS) {
1182 			if (errno != peer->lasterr)
1183 				log_peer_warn(&peer->conf, "connect");
1184 			peer->lasterr = errno;
1185 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1186 			return (-1);
1187 		}
1188 	} else
1189 		bgp_fsm(peer, EVNT_CON_OPEN);
1190 
1191 	return (0);
1192 }
1193 
1194 int
1195 session_setup_socket(struct peer *p)
1196 {
1197 	int	ttl = p->conf.distance;
1198 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1199 	int	nodelay = 1;
1200 	int	bsize;
1201 
1202 	switch (p->conf.remote_addr.aid) {
1203 	case AID_INET:
1204 		/* set precedence, see RFC 1771 appendix 5 */
1205 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1206 		    -1) {
1207 			log_peer_warn(&p->conf,
1208 			    "session_setup_socket setsockopt TOS");
1209 			return (-1);
1210 		}
1211 
1212 		if (p->conf.ebgp) {
1213 			/* set TTL to foreign router's distance
1214 			   1=direct n=multihop with ttlsec, we always use 255 */
1215 			if (p->conf.ttlsec) {
1216 				ttl = 256 - p->conf.distance;
1217 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1218 				    &ttl, sizeof(ttl)) == -1) {
1219 					log_peer_warn(&p->conf,
1220 					    "session_setup_socket: "
1221 					    "setsockopt MINTTL");
1222 					return (-1);
1223 				}
1224 				ttl = 255;
1225 			}
1226 
1227 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1228 			    sizeof(ttl)) == -1) {
1229 				log_peer_warn(&p->conf,
1230 				    "session_setup_socket setsockopt TTL");
1231 				return (-1);
1232 			}
1233 		}
1234 		break;
1235 	case AID_INET6:
1236 		if (p->conf.ebgp) {
1237 			/* set hoplimit to foreign router's distance
1238 			   1=direct n=multihop with ttlsec, we always use 255 */
1239 			if (p->conf.ttlsec) {
1240 			/*
1241 			 * XXX Kernel has no ip6 equivalent of MINTTL yet so
1242 			 * we can't check incoming packets, but we can at least
1243 			 * set the outgoing TTL to allow sessions configured
1244 			 * with ttl-security to come up.
1245 			 */
1246 				ttl = 255;
1247 			}
1248 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1249 			    &ttl, sizeof(ttl)) == -1) {
1250 				log_peer_warn(&p->conf,
1251 				    "session_setup_socket setsockopt hoplimit");
1252 				return (-1);
1253 			}
1254 		}
1255 		break;
1256 	}
1257 
1258 	/* set TCP_NODELAY */
1259 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1260 	    sizeof(nodelay)) == -1) {
1261 		log_peer_warn(&p->conf,
1262 		    "session_setup_socket setsockopt TCP_NODELAY");
1263 		return (-1);
1264 	}
1265 
1266 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1267 	if (p->conf.auth.method != AUTH_NONE) {
1268 		/* try to increase bufsize. no biggie if it fails */
1269 		bsize = 65535;
1270 		while (bsize > 8192 &&
1271 		    setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1272 		    sizeof(bsize)) == -1 && errno != EINVAL)
1273 			bsize /= 2;
1274 		bsize = 65535;
1275 		while (bsize > 8192 &&
1276 		    setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1277 		    sizeof(bsize)) == -1 && errno != EINVAL)
1278 			bsize /= 2;
1279 	}
1280 
1281 	return (0);
1282 }
1283 
1284 void
1285 session_socket_blockmode(int fd, enum blockmodes bm)
1286 {
1287 	int	flags;
1288 
1289 	if ((flags = fcntl(fd, F_GETFL, 0)) == -1)
1290 		fatal("fcntl F_GETFL");
1291 
1292 	if (bm == BM_NONBLOCK)
1293 		flags |= O_NONBLOCK;
1294 	else
1295 		flags &= ~O_NONBLOCK;
1296 
1297 	if ((flags = fcntl(fd, F_SETFL, flags)) == -1)
1298 		fatal("fcntl F_SETFL");
1299 }
1300 
1301 void
1302 session_tcp_established(struct peer *peer)
1303 {
1304 	socklen_t	len;
1305 
1306 	len = sizeof(peer->sa_local);
1307 	if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local,
1308 	    &len) == -1)
1309 		log_warn("getsockname");
1310 	len = sizeof(peer->sa_remote);
1311 	if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote,
1312 	    &len) == -1)
1313 		log_warn("getpeername");
1314 }
1315 
1316 void
1317 session_capa_ann_none(struct peer *peer)
1318 {
1319 	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1320 }
1321 
1322 int
1323 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len)
1324 {
1325 	int errs = 0;
1326 
1327 	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1328 	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1329 	return (errs);
1330 }
1331 
1332 int
1333 session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
1334 {
1335 	u_int8_t		 safi, pad = 0;
1336 	u_int16_t		 afi;
1337 	int			 errs = 0;
1338 
1339 	if (aid2afi(aid, &afi, &safi) == -1)
1340 		fatalx("session_capa_add_mp: bad afi/safi pair");
1341 	afi = htons(afi);
1342 	errs += ibuf_add(buf, &afi, sizeof(afi));
1343 	errs += ibuf_add(buf, &pad, sizeof(pad));
1344 	errs += ibuf_add(buf, &safi, sizeof(safi));
1345 
1346 	return (errs);
1347 }
1348 
1349 int
1350 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid)
1351 {
1352 	u_int		errs = 0;
1353 	u_int16_t	afi;
1354 	u_int8_t	flags, safi;
1355 
1356 	if (aid2afi(aid, &afi, &safi)) {
1357 		log_warn("session_capa_add_gr: bad AID");
1358 		return (1);
1359 	}
1360 	if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING)
1361 		flags = CAPA_GR_F_FLAG;
1362 	else
1363 		flags = 0;
1364 
1365 	afi = htons(afi);
1366 	errs += ibuf_add(b, &afi, sizeof(afi));
1367 	errs += ibuf_add(b, &safi, sizeof(safi));
1368 	errs += ibuf_add(b, &flags, sizeof(flags));
1369 
1370 	return (errs);
1371 }
1372 
1373 struct bgp_msg *
1374 session_newmsg(enum msg_type msgtype, u_int16_t len)
1375 {
1376 	struct bgp_msg		*msg;
1377 	struct msg_header	 hdr;
1378 	struct ibuf		*buf;
1379 	int			 errs = 0;
1380 
1381 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1382 	hdr.len = htons(len);
1383 	hdr.type = msgtype;
1384 
1385 	if ((buf = ibuf_open(len)) == NULL)
1386 		return (NULL);
1387 
1388 	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1389 	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1390 	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1391 
1392 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1393 		ibuf_free(buf);
1394 		return (NULL);
1395 	}
1396 
1397 	msg->buf = buf;
1398 	msg->type = msgtype;
1399 	msg->len = len;
1400 
1401 	return (msg);
1402 }
1403 
1404 int
1405 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1406 {
1407 	struct mrt		*mrt;
1408 
1409 	LIST_FOREACH(mrt, &mrthead, entry) {
1410 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1411 		    mrt->type == MRT_UPDATE_OUT)))
1412 			continue;
1413 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1414 		    mrt->peer_id == p->conf.id || (mrt->group_id == 0 &&
1415 		    mrt->group_id == p->conf.groupid))
1416 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1417 	}
1418 
1419 	ibuf_close(&p->wbuf, msg->buf);
1420 	free(msg);
1421 	return (0);
1422 }
1423 
1424 void
1425 session_open(struct peer *p)
1426 {
1427 	struct bgp_msg		*buf;
1428 	struct ibuf		*opb;
1429 	struct msg_open		 msg;
1430 	u_int16_t		 len;
1431 	u_int8_t		 i, op_type, optparamlen = 0;
1432 	int			 errs = 0;
1433 	int			 mpcapa = 0;
1434 
1435 
1436 	if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1437 	    sizeof(optparamlen))) == NULL) {
1438 		bgp_fsm(p, EVNT_CON_FATAL);
1439 		return;
1440 	}
1441 
1442 	/* multiprotocol extensions, RFC 4760 */
1443 	for (i = 0; i < AID_MAX; i++)
1444 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1445 			errs += session_capa_add(opb, CAPA_MP, 4);
1446 			errs += session_capa_add_mp(opb, i);
1447 			mpcapa++;
1448 		}
1449 
1450 	/* route refresh, RFC 2918 */
1451 	if (p->capa.ann.refresh)	/* no data */
1452 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1453 
1454 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1455 	if (p->capa.ann.grestart.restart) {
1456 		int		rst = 0;
1457 		u_int16_t	hdr;
1458 		u_int8_t	grlen;
1459 
1460 		if (mpcapa) {
1461 			grlen = 2 + 4 * mpcapa;
1462 			for (i = 0; i < AID_MAX; i++) {
1463 				if (p->capa.neg.grestart.flags[i] &
1464 				    CAPA_GR_RESTARTING)
1465 					rst++;
1466 			}
1467 		} else {	/* AID_INET */
1468 			grlen = 2 + 4;
1469 			if (p->capa.neg.grestart.flags[AID_INET] &
1470 			    CAPA_GR_RESTARTING)
1471 				rst++;
1472 		}
1473 
1474 		hdr = conf->holdtime;		/* default timeout */
1475 		/* if client does graceful restart don't set R flag */
1476 		if (!rst)
1477 			hdr |= CAPA_GR_R_FLAG;
1478 		hdr = htons(hdr);
1479 
1480 		errs += session_capa_add(opb, CAPA_RESTART, grlen);
1481 		errs += ibuf_add(opb, &hdr, sizeof(hdr));
1482 
1483 		if (mpcapa) {
1484 			for (i = 0; i < AID_MAX; i++) {
1485 				if (p->capa.ann.mp[i]) {
1486 					errs += session_capa_add_gr(p, opb, i);
1487 				}
1488 			}
1489 		} else {	/* AID_INET */
1490 			errs += session_capa_add_gr(p, opb, AID_INET);
1491 		}
1492 	}
1493 
1494 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1495 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1496 		u_int32_t	nas;
1497 
1498 		nas = htonl(conf->as);
1499 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1500 		errs += ibuf_add(opb, &nas, sizeof(nas));
1501 	}
1502 
1503 	if (ibuf_size(opb))
1504 		optparamlen = ibuf_size(opb) + sizeof(op_type) +
1505 		    sizeof(optparamlen);
1506 
1507 	len = MSGSIZE_OPEN_MIN + optparamlen;
1508 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1509 		ibuf_free(opb);
1510 		bgp_fsm(p, EVNT_CON_FATAL);
1511 		return;
1512 	}
1513 
1514 	msg.version = 4;
1515 	msg.myas = htons(conf->short_as);
1516 	if (p->conf.holdtime)
1517 		msg.holdtime = htons(p->conf.holdtime);
1518 	else
1519 		msg.holdtime = htons(conf->holdtime);
1520 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1521 	msg.optparamlen = optparamlen;
1522 
1523 	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1524 	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1525 	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1526 	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1527 	errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1528 
1529 	if (optparamlen) {
1530 		op_type = OPT_PARAM_CAPABILITIES;
1531 		optparamlen = ibuf_size(opb);
1532 		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1533 		errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1534 		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1535 	}
1536 
1537 	ibuf_free(opb);
1538 
1539 	if (errs) {
1540 		ibuf_free(buf->buf);
1541 		free(buf);
1542 		bgp_fsm(p, EVNT_CON_FATAL);
1543 		return;
1544 	}
1545 
1546 	if (session_sendmsg(buf, p) == -1) {
1547 		bgp_fsm(p, EVNT_CON_FATAL);
1548 		return;
1549 	}
1550 
1551 	p->stats.msg_sent_open++;
1552 }
1553 
1554 void
1555 session_keepalive(struct peer *p)
1556 {
1557 	struct bgp_msg		*buf;
1558 
1559 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1560 	    session_sendmsg(buf, p) == -1) {
1561 		bgp_fsm(p, EVNT_CON_FATAL);
1562 		return;
1563 	}
1564 
1565 	start_timer_keepalive(p);
1566 	p->stats.msg_sent_keepalive++;
1567 }
1568 
1569 void
1570 session_update(u_int32_t peerid, void *data, size_t datalen)
1571 {
1572 	struct peer		*p;
1573 	struct bgp_msg		*buf;
1574 
1575 	if ((p = getpeerbyid(peerid)) == NULL) {
1576 		log_warnx("no such peer: id=%u", peerid);
1577 		return;
1578 	}
1579 
1580 	if (p->state != STATE_ESTABLISHED)
1581 		return;
1582 
1583 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1584 		bgp_fsm(p, EVNT_CON_FATAL);
1585 		return;
1586 	}
1587 
1588 	if (ibuf_add(buf->buf, data, datalen)) {
1589 		ibuf_free(buf->buf);
1590 		free(buf);
1591 		bgp_fsm(p, EVNT_CON_FATAL);
1592 		return;
1593 	}
1594 
1595 	if (session_sendmsg(buf, p) == -1) {
1596 		bgp_fsm(p, EVNT_CON_FATAL);
1597 		return;
1598 	}
1599 
1600 	start_timer_keepalive(p);
1601 	p->stats.msg_sent_update++;
1602 }
1603 
1604 void
1605 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1606     void *data, ssize_t datalen)
1607 {
1608 	struct bgp_msg		*buf;
1609 	int			 errs = 0;
1610 
1611 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1612 		return;
1613 
1614 	log_notification(p, errcode, subcode, data, datalen, "sending");
1615 
1616 	if ((buf = session_newmsg(NOTIFICATION,
1617 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1618 		bgp_fsm(p, EVNT_CON_FATAL);
1619 		return;
1620 	}
1621 
1622 	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1623 	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1624 
1625 	if (datalen > 0)
1626 		errs += ibuf_add(buf->buf, data, datalen);
1627 
1628 	if (errs) {
1629 		ibuf_free(buf->buf);
1630 		free(buf);
1631 		bgp_fsm(p, EVNT_CON_FATAL);
1632 		return;
1633 	}
1634 
1635 	if (session_sendmsg(buf, p) == -1) {
1636 		bgp_fsm(p, EVNT_CON_FATAL);
1637 		return;
1638 	}
1639 
1640 	p->stats.msg_sent_notification++;
1641 	p->stats.last_sent_errcode = errcode;
1642 	p->stats.last_sent_suberr = subcode;
1643 }
1644 
1645 int
1646 session_neighbor_rrefresh(struct peer *p)
1647 {
1648 	u_int8_t	i;
1649 
1650 	if (!p->capa.peer.refresh)
1651 		return (-1);
1652 
1653 	for (i = 0; i < AID_MAX; i++) {
1654 		if (p->capa.peer.mp[i] != 0)
1655 			session_rrefresh(p, i);
1656 	}
1657 
1658 	return (0);
1659 }
1660 
1661 void
1662 session_rrefresh(struct peer *p, u_int8_t aid)
1663 {
1664 	struct bgp_msg		*buf;
1665 	int			 errs = 0;
1666 	u_int16_t		 afi;
1667 	u_int8_t		 safi, null8 = 0;
1668 
1669 	if (aid2afi(aid, &afi, &safi) == -1)
1670 		fatalx("session_rrefresh: bad afi/safi pair");
1671 
1672 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1673 		bgp_fsm(p, EVNT_CON_FATAL);
1674 		return;
1675 	}
1676 
1677 	afi = htons(afi);
1678 	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1679 	errs += ibuf_add(buf->buf, &null8, sizeof(null8));
1680 	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1681 
1682 	if (errs) {
1683 		ibuf_free(buf->buf);
1684 		free(buf);
1685 		bgp_fsm(p, EVNT_CON_FATAL);
1686 		return;
1687 	}
1688 
1689 	if (session_sendmsg(buf, p) == -1) {
1690 		bgp_fsm(p, EVNT_CON_FATAL);
1691 		return;
1692 	}
1693 
1694 	p->stats.msg_sent_rrefresh++;
1695 }
1696 
1697 int
1698 session_graceful_restart(struct peer *p)
1699 {
1700 	u_int8_t	i;
1701 
1702 	timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout);
1703 
1704 	for (i = 0; i < AID_MAX; i++) {
1705 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1706 			if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE,
1707 			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1708 				return (-1);
1709 			log_peer_warnx(&p->conf,
1710 			    "graceful restart of %s, keeping routes",
1711 			    aid2str(i));
1712 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1713 		} else if (p->capa.neg.mp[i]) {
1714 			if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
1715 			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1716 				return (-1);
1717 			log_peer_warnx(&p->conf,
1718 			    "graceful restart of %s, flushing routes",
1719 			    aid2str(i));
1720 		}
1721 	}
1722 	return (0);
1723 }
1724 
1725 int
1726 session_graceful_stop(struct peer *p)
1727 {
1728 	u_int8_t	i;
1729 
1730 	for (i = 0; i < AID_MAX; i++) {
1731 		/*
1732 		 * Only flush if the peer is restarting and the timeout fired.
1733 		 * In all other cases the session was already flushed when the
1734 		 * session went down or when the new open message was parsed.
1735 		 */
1736 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1737 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1738 			    "time-out, flushing", aid2str(i));
1739 			if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
1740 			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1741 				return (-1);
1742 		}
1743 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1744 	}
1745 	return (0);
1746 }
1747 
1748 int
1749 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1750 {
1751 	ssize_t		n;
1752 	socklen_t	len;
1753 	int		error;
1754 
1755 	if (p->state == STATE_CONNECT) {
1756 		if (pfd->revents & POLLOUT) {
1757 			if (pfd->revents & POLLIN) {
1758 				/* error occurred */
1759 				len = sizeof(error);
1760 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1761 				    &error, &len) == -1 || error) {
1762 					if (error)
1763 						errno = error;
1764 					if (errno != p->lasterr) {
1765 						log_peer_warn(&p->conf,
1766 						    "socket error");
1767 						p->lasterr = errno;
1768 					}
1769 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1770 					return (1);
1771 				}
1772 			}
1773 			bgp_fsm(p, EVNT_CON_OPEN);
1774 			return (1);
1775 		}
1776 		if (pfd->revents & POLLHUP) {
1777 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1778 			return (1);
1779 		}
1780 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1781 			bgp_fsm(p, EVNT_CON_FATAL);
1782 			return (1);
1783 		}
1784 		return (0);
1785 	}
1786 
1787 	if (pfd->revents & POLLHUP) {
1788 		bgp_fsm(p, EVNT_CON_CLOSED);
1789 		return (1);
1790 	}
1791 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1792 		bgp_fsm(p, EVNT_CON_FATAL);
1793 		return (1);
1794 	}
1795 
1796 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1797 		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1798 			if (error == 0)
1799 				log_peer_warnx(&p->conf, "Connection closed");
1800 			else if (error == -1)
1801 				log_peer_warn(&p->conf, "write error");
1802 			bgp_fsm(p, EVNT_CON_FATAL);
1803 			return (1);
1804 		}
1805 		if (!(pfd->revents & POLLIN))
1806 			return (1);
1807 	}
1808 
1809 	if (p->rbuf && pfd->revents & POLLIN) {
1810 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1811 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1812 			if (errno != EINTR && errno != EAGAIN) {
1813 				log_peer_warn(&p->conf, "read error");
1814 				bgp_fsm(p, EVNT_CON_FATAL);
1815 			}
1816 			return (1);
1817 		}
1818 		if (n == 0) {	/* connection closed */
1819 			bgp_fsm(p, EVNT_CON_CLOSED);
1820 			return (1);
1821 		}
1822 
1823 		p->rbuf->wpos += n;
1824 		p->stats.last_read = time(NULL);
1825 		return (1);
1826 	}
1827 	return (0);
1828 }
1829 
1830 int
1831 session_process_msg(struct peer *p)
1832 {
1833 	ssize_t		rpos, av, left;
1834 	int		processed = 0;
1835 	u_int16_t	msglen;
1836 	u_int8_t	msgtype;
1837 
1838 	rpos = 0;
1839 	av = p->rbuf->wpos;
1840 
1841 	/*
1842 	 * session might drop to IDLE -> buffers deallocated
1843 	 * we MUST check rbuf != NULL before use
1844 	 */
1845 	for (;;) {
1846 		if (rpos + MSGSIZE_HEADER > av)
1847 			break;
1848 		if (p->rbuf == NULL)
1849 			break;
1850 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1851 		    &msgtype) == -1)
1852 			return (0);
1853 		if (rpos + msglen > av)
1854 			break;
1855 		p->rbuf->rptr = p->rbuf->buf + rpos;
1856 
1857 		switch (msgtype) {
1858 		case OPEN:
1859 			bgp_fsm(p, EVNT_RCVD_OPEN);
1860 			p->stats.msg_rcvd_open++;
1861 			break;
1862 		case UPDATE:
1863 			bgp_fsm(p, EVNT_RCVD_UPDATE);
1864 			p->stats.msg_rcvd_update++;
1865 			break;
1866 		case NOTIFICATION:
1867 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1868 			p->stats.msg_rcvd_notification++;
1869 			break;
1870 		case KEEPALIVE:
1871 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1872 			p->stats.msg_rcvd_keepalive++;
1873 			break;
1874 		case RREFRESH:
1875 			parse_refresh(p);
1876 			p->stats.msg_rcvd_rrefresh++;
1877 			break;
1878 		default:	/* cannot happen */
1879 			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
1880 			    &msgtype, 1);
1881 			log_warnx("received message with unknown type %u",
1882 			    msgtype);
1883 			bgp_fsm(p, EVNT_CON_FATAL);
1884 		}
1885 		rpos += msglen;
1886 		if (++processed > MSG_PROCESS_LIMIT)
1887 			break;
1888 	}
1889 	if (p->rbuf == NULL)
1890 		return (1);
1891 
1892 	if (rpos < av) {
1893 		left = av - rpos;
1894 		memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1895 		p->rbuf->wpos = left;
1896 	} else
1897 		p->rbuf->wpos = 0;
1898 
1899 	return (1);
1900 }
1901 
1902 int
1903 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1904 {
1905 	struct mrt		*mrt;
1906 	u_char			*p;
1907 	u_int16_t		 olen;
1908 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1909 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1910 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1911 
1912 	/* caller MUST make sure we are getting 19 bytes! */
1913 	p = data;
1914 	if (memcmp(p, marker, sizeof(marker))) {
1915 		log_peer_warnx(&peer->conf, "sync error");
1916 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1917 		bgp_fsm(peer, EVNT_CON_FATAL);
1918 		return (-1);
1919 	}
1920 	p += MSGSIZE_HEADER_MARKER;
1921 
1922 	memcpy(&olen, p, 2);
1923 	*len = ntohs(olen);
1924 	p += 2;
1925 	memcpy(type, p, 1);
1926 
1927 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1928 		log_peer_warnx(&peer->conf,
1929 		    "received message: illegal length: %u byte", *len);
1930 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1931 		    &olen, sizeof(olen));
1932 		bgp_fsm(peer, EVNT_CON_FATAL);
1933 		return (-1);
1934 	}
1935 
1936 	switch (*type) {
1937 	case OPEN:
1938 		if (*len < MSGSIZE_OPEN_MIN) {
1939 			log_peer_warnx(&peer->conf,
1940 			    "received OPEN: illegal len: %u byte", *len);
1941 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1942 			    &olen, sizeof(olen));
1943 			bgp_fsm(peer, EVNT_CON_FATAL);
1944 			return (-1);
1945 		}
1946 		break;
1947 	case NOTIFICATION:
1948 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1949 			log_peer_warnx(&peer->conf,
1950 			    "received NOTIFICATION: illegal len: %u byte",
1951 			    *len);
1952 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1953 			    &olen, sizeof(olen));
1954 			bgp_fsm(peer, EVNT_CON_FATAL);
1955 			return (-1);
1956 		}
1957 		break;
1958 	case UPDATE:
1959 		if (*len < MSGSIZE_UPDATE_MIN) {
1960 			log_peer_warnx(&peer->conf,
1961 			    "received UPDATE: illegal len: %u byte", *len);
1962 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1963 			    &olen, sizeof(olen));
1964 			bgp_fsm(peer, EVNT_CON_FATAL);
1965 			return (-1);
1966 		}
1967 		break;
1968 	case KEEPALIVE:
1969 		if (*len != MSGSIZE_KEEPALIVE) {
1970 			log_peer_warnx(&peer->conf,
1971 			    "received KEEPALIVE: illegal len: %u byte", *len);
1972 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1973 			    &olen, sizeof(olen));
1974 			bgp_fsm(peer, EVNT_CON_FATAL);
1975 			return (-1);
1976 		}
1977 		break;
1978 	case RREFRESH:
1979 		if (*len != MSGSIZE_RREFRESH) {
1980 			log_peer_warnx(&peer->conf,
1981 			    "received RREFRESH: illegal len: %u byte", *len);
1982 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1983 			    &olen, sizeof(olen));
1984 			bgp_fsm(peer, EVNT_CON_FATAL);
1985 			return (-1);
1986 		}
1987 		break;
1988 	default:
1989 		log_peer_warnx(&peer->conf,
1990 		    "received msg with unknown type %u", *type);
1991 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1992 		    type, 1);
1993 		bgp_fsm(peer, EVNT_CON_FATAL);
1994 		return (-1);
1995 	}
1996 	LIST_FOREACH(mrt, &mrthead, entry) {
1997 		if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE &&
1998 		    mrt->type == MRT_UPDATE_IN)))
1999 			continue;
2000 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
2001 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
2002 		    mrt->group_id == peer->conf.groupid))
2003 			mrt_dump_bgp_msg(mrt, data, *len, peer);
2004 	}
2005 	return (0);
2006 }
2007 
2008 int
2009 parse_open(struct peer *peer)
2010 {
2011 	u_char		*p, *op_val;
2012 	u_int8_t	 version, rversion;
2013 	u_int16_t	 short_as, msglen;
2014 	u_int16_t	 holdtime, oholdtime, myholdtime;
2015 	u_int32_t	 as, bgpid;
2016 	u_int8_t	 optparamlen, plen;
2017 	u_int8_t	 op_type, op_len;
2018 
2019 	p = peer->rbuf->rptr;
2020 	p += MSGSIZE_HEADER_MARKER;
2021 	memcpy(&msglen, p, sizeof(msglen));
2022 	msglen = ntohs(msglen);
2023 
2024 	p = peer->rbuf->rptr;
2025 	p += MSGSIZE_HEADER;	/* header is already checked */
2026 
2027 	memcpy(&version, p, sizeof(version));
2028 	p += sizeof(version);
2029 
2030 	if (version != BGP_VERSION) {
2031 		log_peer_warnx(&peer->conf,
2032 		    "peer wants unrecognized version %u", version);
2033 		if (version > BGP_VERSION)
2034 			rversion = version - BGP_VERSION;
2035 		else
2036 			rversion = BGP_VERSION;
2037 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
2038 		    &rversion, sizeof(rversion));
2039 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2040 		return (-1);
2041 	}
2042 
2043 	memcpy(&short_as, p, sizeof(short_as));
2044 	p += sizeof(short_as);
2045 	as = peer->short_as = ntohs(short_as);
2046 
2047 	memcpy(&oholdtime, p, sizeof(oholdtime));
2048 	p += sizeof(oholdtime);
2049 
2050 	holdtime = ntohs(oholdtime);
2051 	if (holdtime && holdtime < peer->conf.min_holdtime) {
2052 		log_peer_warnx(&peer->conf,
2053 		    "peer requests unacceptable holdtime %u", holdtime);
2054 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
2055 		    NULL, 0);
2056 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2057 		return (-1);
2058 	}
2059 
2060 	myholdtime = peer->conf.holdtime;
2061 	if (!myholdtime)
2062 		myholdtime = conf->holdtime;
2063 	if (holdtime < myholdtime)
2064 		peer->holdtime = holdtime;
2065 	else
2066 		peer->holdtime = myholdtime;
2067 
2068 	memcpy(&bgpid, p, sizeof(bgpid));
2069 	p += sizeof(bgpid);
2070 
2071 	/* check bgpid for validity - just disallow 0 */
2072 	if (ntohl(bgpid) == 0) {
2073 		log_peer_warnx(&peer->conf, "peer BGPID %lu unacceptable",
2074 		    ntohl(bgpid));
2075 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2076 		    NULL, 0);
2077 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2078 		return (-1);
2079 	}
2080 	peer->remote_bgpid = bgpid;
2081 
2082 	memcpy(&optparamlen, p, sizeof(optparamlen));
2083 	p += sizeof(optparamlen);
2084 
2085 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
2086 			log_peer_warnx(&peer->conf,
2087 			    "corrupt OPEN message received: length mismatch");
2088 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2089 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2090 			return (-1);
2091 	}
2092 
2093 	plen = optparamlen;
2094 	while (plen > 0) {
2095 		if (plen < 2) {
2096 			log_peer_warnx(&peer->conf,
2097 			    "corrupt OPEN message received, len wrong");
2098 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2099 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2100 			return (-1);
2101 		}
2102 		memcpy(&op_type, p, sizeof(op_type));
2103 		p += sizeof(op_type);
2104 		plen -= sizeof(op_type);
2105 		memcpy(&op_len, p, sizeof(op_len));
2106 		p += sizeof(op_len);
2107 		plen -= sizeof(op_len);
2108 		if (op_len > 0) {
2109 			if (plen < op_len) {
2110 				log_peer_warnx(&peer->conf,
2111 				    "corrupt OPEN message received, len wrong");
2112 				session_notification(peer, ERR_OPEN, 0,
2113 				    NULL, 0);
2114 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2115 				return (-1);
2116 			}
2117 			op_val = p;
2118 			p += op_len;
2119 			plen -= op_len;
2120 		} else
2121 			op_val = NULL;
2122 
2123 		switch (op_type) {
2124 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2125 			if (parse_capabilities(peer, op_val, op_len,
2126 			    &as) == -1) {
2127 				session_notification(peer, ERR_OPEN, 0,
2128 				    NULL, 0);
2129 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2130 				return (-1);
2131 			}
2132 			break;
2133 		case OPT_PARAM_AUTH:			/* deprecated */
2134 		default:
2135 			/*
2136 			 * unsupported type
2137 			 * the RFCs tell us to leave the data section empty
2138 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
2139 			 * How the peer should know _which_ optional parameter
2140 			 * we don't support is beyond me.
2141 			 */
2142 			log_peer_warnx(&peer->conf,
2143 			    "received OPEN message with unsupported optional "
2144 			    "parameter: type %u", op_type);
2145 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
2146 				NULL, 0);
2147 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2148 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
2149 			peer->IdleHoldTime /= 2;
2150 			return (-1);
2151 		}
2152 	}
2153 
2154 	/* if remote-as is zero and it's a cloned neighbor, accept any */
2155 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2156 		peer->conf.remote_as = as;
2157 		peer->conf.ebgp = (peer->conf.remote_as != conf->as);
2158 		if (!peer->conf.ebgp)
2159 			/* force enforce_as off for iBGP sessions */
2160 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2161 	}
2162 
2163 	if (peer->conf.remote_as != as) {
2164 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2165 		    log_as(as));
2166 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2167 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2168 		return (-1);
2169 	}
2170 
2171 	if (capa_neg_calc(peer) == -1) {
2172 		log_peer_warnx(&peer->conf,
2173 		    "capability negotiation calculation failed");
2174 		session_notification(peer, ERR_OPEN, 0, NULL, 0);
2175 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2176 		return (-1);
2177 	}
2178 
2179 	return (0);
2180 }
2181 
2182 int
2183 parse_update(struct peer *peer)
2184 {
2185 	u_char		*p;
2186 	u_int16_t	 datalen;
2187 
2188 	/*
2189 	 * we pass the message verbatim to the rde.
2190 	 * in case of errors the whole session is reset with a
2191 	 * notification anyway, we only need to know the peer
2192 	 */
2193 	p = peer->rbuf->rptr;
2194 	p += MSGSIZE_HEADER_MARKER;
2195 	memcpy(&datalen, p, sizeof(datalen));
2196 	datalen = ntohs(datalen);
2197 
2198 	p = peer->rbuf->rptr;
2199 	p += MSGSIZE_HEADER;	/* header is already checked */
2200 	datalen -= MSGSIZE_HEADER;
2201 
2202 	if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p,
2203 	    datalen) == -1)
2204 		return (-1);
2205 
2206 	return (0);
2207 }
2208 
2209 int
2210 parse_refresh(struct peer *peer)
2211 {
2212 	u_char		*p;
2213 	u_int16_t	 afi;
2214 	u_int8_t	 aid, safi;
2215 
2216 	p = peer->rbuf->rptr;
2217 	p += MSGSIZE_HEADER;	/* header is already checked */
2218 
2219 	/*
2220 	 * We could check if we actually announced the capability but
2221 	 * as long as the message is correctly encoded we don't care.
2222 	 */
2223 
2224 	/* afi, 2 byte */
2225 	memcpy(&afi, p, sizeof(afi));
2226 	afi = ntohs(afi);
2227 	p += 2;
2228 	/* reserved, 1 byte */
2229 	p += 1;
2230 	/* safi, 1 byte */
2231 	memcpy(&safi, p, sizeof(safi));
2232 
2233 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2234 	if (afi2aid(afi, safi, &aid) == -1) {
2235 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2236 		    "invalid afi/safi pair");
2237 		return (0);
2238 	}
2239 
2240 	if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid,
2241 	    sizeof(aid)) == -1)
2242 		return (-1);
2243 
2244 	return (0);
2245 }
2246 
2247 int
2248 parse_notification(struct peer *peer)
2249 {
2250 	u_char		*p;
2251 	u_int16_t	 datalen;
2252 	u_int8_t	 errcode;
2253 	u_int8_t	 subcode;
2254 	u_int8_t	 capa_code;
2255 	u_int8_t	 capa_len;
2256 	u_int8_t	 i;
2257 
2258 	/* just log */
2259 	p = peer->rbuf->rptr;
2260 	p += MSGSIZE_HEADER_MARKER;
2261 	memcpy(&datalen, p, sizeof(datalen));
2262 	datalen = ntohs(datalen);
2263 
2264 	p = peer->rbuf->rptr;
2265 	p += MSGSIZE_HEADER;	/* header is already checked */
2266 	datalen -= MSGSIZE_HEADER;
2267 
2268 	memcpy(&errcode, p, sizeof(errcode));
2269 	p += sizeof(errcode);
2270 	datalen -= sizeof(errcode);
2271 
2272 	memcpy(&subcode, p, sizeof(subcode));
2273 	p += sizeof(subcode);
2274 	datalen -= sizeof(subcode);
2275 
2276 	log_notification(peer, errcode, subcode, p, datalen, "received");
2277 	peer->errcnt++;
2278 
2279 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2280 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2281 			log_peer_warnx(&peer->conf, "received \"unsupported "
2282 			    "capability\" notification without data part, "
2283 			    "disabling capability announcements altogether");
2284 			session_capa_ann_none(peer);
2285 		}
2286 
2287 		while (datalen > 0) {
2288 			if (datalen < 2) {
2289 				log_peer_warnx(&peer->conf,
2290 				    "parse_notification: "
2291 				    "expect len >= 2, len is %u", datalen);
2292 				return (-1);
2293 			}
2294 			memcpy(&capa_code, p, sizeof(capa_code));
2295 			p += sizeof(capa_code);
2296 			datalen -= sizeof(capa_code);
2297 			memcpy(&capa_len, p, sizeof(capa_len));
2298 			p += sizeof(capa_len);
2299 			datalen -= sizeof(capa_len);
2300 			if (datalen < capa_len) {
2301 				log_peer_warnx(&peer->conf,
2302 				    "parse_notification: capa_len %u exceeds "
2303 				    "remaining msg length %u", capa_len,
2304 				    datalen);
2305 				return (-1);
2306 			}
2307 			p += capa_len;
2308 			datalen -= capa_len;
2309 			switch (capa_code) {
2310 			case CAPA_MP:
2311 				for (i = 0; i < AID_MAX; i++)
2312 					peer->capa.ann.mp[i] = 0;
2313 				log_peer_warnx(&peer->conf,
2314 				    "disabling multiprotocol capability");
2315 				break;
2316 			case CAPA_REFRESH:
2317 				peer->capa.ann.refresh = 0;
2318 				log_peer_warnx(&peer->conf,
2319 				    "disabling route refresh capability");
2320 				break;
2321 			case CAPA_RESTART:
2322 				peer->capa.ann.grestart.restart = 0;
2323 				log_peer_warnx(&peer->conf,
2324 				    "disabling restart capability");
2325 				break;
2326 			case CAPA_AS4BYTE:
2327 				peer->capa.ann.as4byte = 0;
2328 				log_peer_warnx(&peer->conf,
2329 				    "disabling 4-byte AS num capability");
2330 				break;
2331 			default:	/* should not happen... */
2332 				log_peer_warnx(&peer->conf, "received "
2333 				    "\"unsupported capability\" notification "
2334 				    "for unknown capability %u, disabling "
2335 				    "capability announcements altogether",
2336 				    capa_code);
2337 				session_capa_ann_none(peer);
2338 				break;
2339 			}
2340 		}
2341 
2342 		return (1);
2343 	}
2344 
2345 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2346 		session_capa_ann_none(peer);
2347 		return (1);
2348 	}
2349 
2350 	return (0);
2351 }
2352 
2353 int
2354 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2355 {
2356 	u_char		*capa_val;
2357 	u_int32_t	 remote_as;
2358 	u_int16_t	 len;
2359 	u_int16_t	 afi;
2360 	u_int16_t	 gr_header;
2361 	u_int8_t	 safi;
2362 	u_int8_t	 aid;
2363 	u_int8_t	 gr_flags;
2364 	u_int8_t	 capa_code;
2365 	u_int8_t	 capa_len;
2366 	u_int8_t	 i;
2367 
2368 	len = dlen;
2369 	while (len > 0) {
2370 		if (len < 2) {
2371 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2372 			    "length: %u, too short", len);
2373 			return (-1);
2374 		}
2375 		memcpy(&capa_code, d, sizeof(capa_code));
2376 		d += sizeof(capa_code);
2377 		len -= sizeof(capa_code);
2378 		memcpy(&capa_len, d, sizeof(capa_len));
2379 		d += sizeof(capa_len);
2380 		len -= sizeof(capa_len);
2381 		if (capa_len > 0) {
2382 			if (len < capa_len) {
2383 				log_peer_warnx(&peer->conf,
2384 				    "Bad capabilities attr length: "
2385 				    "len %u smaller than capa_len %u",
2386 				    len, capa_len);
2387 				return (-1);
2388 			}
2389 			capa_val = d;
2390 			d += capa_len;
2391 			len -= capa_len;
2392 		} else
2393 			capa_val = NULL;
2394 
2395 		switch (capa_code) {
2396 		case CAPA_MP:			/* RFC 4760 */
2397 			if (capa_len != 4) {
2398 				log_peer_warnx(&peer->conf,
2399 				    "Bad multi protocol capability length: "
2400 				    "%u", capa_len);
2401 				break;
2402 			}
2403 			memcpy(&afi, capa_val, sizeof(afi));
2404 			afi = ntohs(afi);
2405 			memcpy(&safi, capa_val + 3, sizeof(safi));
2406 			if (afi2aid(afi, safi, &aid) == -1) {
2407 				log_peer_warnx(&peer->conf,
2408 				    "Received multi protocol capability: "
2409 				    " unknown AFI %u, safi %u pair",
2410 				    afi, safi);
2411 				break;
2412 			}
2413 			peer->capa.peer.mp[aid] = 1;
2414 			break;
2415 		case CAPA_REFRESH:
2416 			peer->capa.peer.refresh = 1;
2417 			break;
2418 		case CAPA_RESTART:
2419 			if (capa_len == 2) {
2420 				/* peer only supports EoR marker */
2421 				peer->capa.peer.grestart.restart = 1;
2422 				peer->capa.peer.grestart.timeout = 0;
2423 				break;
2424 			} else if (capa_len % 4 != 2) {
2425 				log_peer_warnx(&peer->conf,
2426 				    "Bad graceful restart capability length: "
2427 				    "%u", capa_len);
2428 				peer->capa.peer.grestart.restart = 0;
2429 				peer->capa.peer.grestart.timeout = 0;
2430 				break;
2431 			}
2432 
2433 			memcpy(&gr_header, capa_val, sizeof(gr_header));
2434 			gr_header = ntohs(gr_header);
2435 			peer->capa.peer.grestart.timeout =
2436 			    gr_header & CAPA_GR_TIMEMASK;
2437 			if (peer->capa.peer.grestart.timeout == 0) {
2438 				log_peer_warnx(&peer->conf, "Received "
2439 				    "graceful restart timeout is zero");
2440 				peer->capa.peer.grestart.restart = 0;
2441 				break;
2442 			}
2443 
2444 			for (i = 2; i <= capa_len - 4; i += 4) {
2445 				memcpy(&afi, capa_val + i, sizeof(afi));
2446 				afi = ntohs(afi);
2447 				memcpy(&safi, capa_val + i + 2, sizeof(safi));
2448 				if (afi2aid(afi, safi, &aid) == -1) {
2449 					log_peer_warnx(&peer->conf,
2450 					    "Received graceful restart capa: "
2451 					    " unknown AFI %u, safi %u pair",
2452 					    afi, safi);
2453 					continue;
2454 				}
2455 				memcpy(&gr_flags, capa_val + i + 3,
2456 				    sizeof(gr_flags));
2457 				peer->capa.peer.grestart.flags[aid] |=
2458 				    CAPA_GR_PRESENT;
2459 				if (gr_flags & CAPA_GR_F_FLAG)
2460 					peer->capa.peer.grestart.flags[aid] |=
2461 					    CAPA_GR_FORWARD;
2462 				if (gr_header & CAPA_GR_R_FLAG)
2463 					peer->capa.peer.grestart.flags[aid] |=
2464 					    CAPA_GR_RESTART;
2465 				peer->capa.peer.grestart.restart = 2;
2466 			}
2467 			break;
2468 		case CAPA_AS4BYTE:
2469 			if (capa_len != 4) {
2470 				log_peer_warnx(&peer->conf,
2471 				    "Bad AS4BYTE capability length: "
2472 				    "%u", capa_len);
2473 				peer->capa.peer.as4byte = 0;
2474 				break;
2475 			}
2476 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2477 			*as = ntohl(remote_as);
2478 			peer->capa.peer.as4byte = 1;
2479 			break;
2480 		default:
2481 			break;
2482 		}
2483 	}
2484 
2485 	return (0);
2486 }
2487 
2488 int
2489 capa_neg_calc(struct peer *p)
2490 {
2491 	u_int8_t	i, hasmp = 0;
2492 
2493 	/* refresh: does not realy matter here, use peer setting */
2494 	p->capa.neg.refresh = p->capa.peer.refresh;
2495 
2496 	/* as4byte: both side must announce capability */
2497 	if (p->capa.ann.as4byte && p->capa.peer.as4byte)
2498 		p->capa.neg.as4byte = 1;
2499 	else
2500 		p->capa.neg.as4byte = 0;
2501 
2502 	/* MP: both side must announce capability */
2503 	for (i = 0; i < AID_MAX; i++) {
2504 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
2505 			p->capa.neg.mp[i] = 1;
2506 			hasmp = 1;
2507 		} else
2508 			p->capa.neg.mp[i] = 0;
2509 	}
2510 	/* if no MP capability present default to IPv4 unicast mode */
2511 	if (!hasmp)
2512 		p->capa.neg.mp[AID_INET] = 1;
2513 
2514 	/*
2515 	 * graceful restart: only the peer capabilities are of interest here.
2516 	 * It is necessary to compare the new values with the previous ones
2517 	 * and act acordingly. AFI/SAFI that are not part in the MP capability
2518 	 * are treated as not being present.
2519 	 */
2520 
2521 	for (i = 0; i < AID_MAX; i++) {
2522 		int8_t	negflags;
2523 
2524 		/* disable GR if the AFI/SAFI is not present */
2525 		if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2526 		    p->capa.neg.mp[i] == 0)
2527 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2528 		/* look at current GR state and decide what to do */
2529 		negflags = p->capa.neg.grestart.flags[i];
2530 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2531 		if (negflags & CAPA_GR_RESTARTING) {
2532 			if (!(p->capa.peer.grestart.flags[i] &
2533 			    CAPA_GR_FORWARD)) {
2534 				if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
2535 				    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
2536 					return (-1);
2537 				log_peer_warnx(&p->conf, "graceful restart of "
2538 				    "%s, not restarted, flushing", aid2str(i));
2539 			} else
2540 				p->capa.neg.grestart.flags[i] |=
2541 				    CAPA_GR_RESTARTING;
2542 		}
2543 	}
2544 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2545 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2546 
2547 	return (0);
2548 }
2549 
2550 void
2551 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2552 {
2553 	struct imsg		 imsg;
2554 	struct mrt		 xmrt;
2555 	struct mrt		*mrt;
2556 	struct peer_config	*pconf;
2557 	struct peer		*p, *next;
2558 	struct listen_addr	*la, *nla;
2559 	struct kif		*kif;
2560 	u_char			*data;
2561 	enum reconf_action	 reconf;
2562 	int			 n, depend_ok, restricted;
2563 	u_int8_t		 aid, errcode, subcode;
2564 
2565 	if ((n = imsg_read(ibuf)) == -1)
2566 		fatal("session_dispatch_imsg: imsg_read error");
2567 
2568 	if (n == 0)	/* connection closed */
2569 		fatalx("session_dispatch_imsg: pipe closed");
2570 
2571 	for (;;) {
2572 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2573 			fatal("session_dispatch_imsg: imsg_get error");
2574 
2575 		if (n == 0)
2576 			break;
2577 
2578 		switch (imsg.hdr.type) {
2579 		case IMSG_RECONF_CONF:
2580 			if (idx != PFD_PIPE_MAIN)
2581 				fatalx("reconf request not from parent");
2582 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
2583 			    NULL)
2584 				fatal(NULL);
2585 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
2586 			if ((nconf->listen_addrs = calloc(1,
2587 			    sizeof(struct listen_addrs))) == NULL)
2588 				fatal(NULL);
2589 			TAILQ_INIT(nconf->listen_addrs);
2590 			npeers = NULL;
2591 			init_conf(nconf);
2592 			pending_reconf = 1;
2593 			break;
2594 		case IMSG_RECONF_PEER:
2595 			if (idx != PFD_PIPE_MAIN)
2596 				fatalx("reconf request not from parent");
2597 			pconf = imsg.data;
2598 			p = getpeerbyaddr(&pconf->remote_addr);
2599 			if (p == NULL) {
2600 				if ((p = calloc(1, sizeof(struct peer))) ==
2601 				    NULL)
2602 					fatal("new_peer");
2603 				p->state = p->prev_state = STATE_NONE;
2604 				p->next = npeers;
2605 				npeers = p;
2606 				reconf = RECONF_REINIT;
2607 			} else
2608 				reconf = RECONF_KEEP;
2609 
2610 			memcpy(&p->conf, pconf, sizeof(struct peer_config));
2611 			p->conf.reconf_action = reconf;
2612 
2613 			/* sync the RDE in case we keep the peer */
2614 			if (reconf == RECONF_KEEP) {
2615 				if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD,
2616 				    p->conf.id, 0, -1, &p->conf,
2617 				    sizeof(struct peer_config)) == -1)
2618 					fatalx("imsg_compose error");
2619 				if (p->conf.template) {
2620 					/* apply the conf to all clones */
2621 					struct peer *np;
2622 					for (np = peers; np; np = np->next) {
2623 						if (np->template != p)
2624 							continue;
2625 						session_template_clone(np,
2626 						    NULL, np->conf.id,
2627 						    np->conf.remote_as);
2628 						if (imsg_compose(ibuf_rde,
2629 						    IMSG_SESSION_ADD,
2630 						    np->conf.id, 0, -1,
2631 						    &np->conf,
2632 						    sizeof(struct peer_config))
2633 						    == -1)
2634 							fatalx("imsg_compose error");
2635 					}
2636 				}
2637 			}
2638 			break;
2639 		case IMSG_RECONF_LISTENER:
2640 			if (idx != PFD_PIPE_MAIN)
2641 				fatalx("reconf request not from parent");
2642 			if (nconf == NULL)
2643 				fatalx("IMSG_RECONF_LISTENER but no config");
2644 			nla = imsg.data;
2645 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2646 				if (!la_cmp(la, nla))
2647 					break;
2648 
2649 			if (la == NULL) {
2650 				if (nla->reconf != RECONF_REINIT)
2651 					fatalx("king bula sez: "
2652 					    "expected REINIT");
2653 
2654 				if ((nla->fd = imsg.fd) == -1)
2655 					log_warnx("expected to receive fd for "
2656 					    "%s but didn't receive any",
2657 					    log_sockaddr((struct sockaddr *)
2658 					    &nla->sa));
2659 
2660 				la = calloc(1, sizeof(struct listen_addr));
2661 				if (la == NULL)
2662 					fatal(NULL);
2663 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2664 				la->flags = nla->flags;
2665 				la->fd = nla->fd;
2666 				la->reconf = RECONF_REINIT;
2667 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2668 				    entry);
2669 			} else {
2670 				if (nla->reconf != RECONF_KEEP)
2671 					fatalx("king bula sez: expected KEEP");
2672 				la->reconf = RECONF_KEEP;
2673 			}
2674 
2675 			break;
2676 		case IMSG_RECONF_CTRL:
2677 			if (idx != PFD_PIPE_MAIN)
2678 				fatalx("reconf request not from parent");
2679 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2680 			    sizeof(restricted))
2681 				fatalx("IFINFO imsg with wrong len");
2682 			memcpy(&restricted, imsg.data, sizeof(restricted));
2683 			if (imsg.fd == -1) {
2684 				log_warnx("expected to receive fd for control "
2685 				    "socket but didn't receive any");
2686 				break;
2687 			}
2688 			if (restricted) {
2689 				control_shutdown(rcsock);
2690 				rcsock = imsg.fd;
2691 				control_listen(rcsock);
2692 			} else {
2693 				control_shutdown(csock);
2694 				csock = imsg.fd;
2695 				control_listen(csock);
2696 			}
2697 			break;
2698 		case IMSG_RECONF_DONE:
2699 			if (idx != PFD_PIPE_MAIN)
2700 				fatalx("reconf request not from parent");
2701 			if (nconf == NULL)
2702 				fatalx("got IMSG_RECONF_DONE but no config");
2703 			conf->flags = nconf->flags;
2704 			conf->log = nconf->log;
2705 			conf->bgpid = nconf->bgpid;
2706 			conf->clusterid = nconf->clusterid;
2707 			conf->as = nconf->as;
2708 			conf->short_as = nconf->short_as;
2709 			conf->holdtime = nconf->holdtime;
2710 			conf->min_holdtime = nconf->min_holdtime;
2711 			conf->connectretry = nconf->connectretry;
2712 
2713 			/* add new peers */
2714 			for (p = npeers; p != NULL; p = next) {
2715 				next = p->next;
2716 				p->next = peers;
2717 				peers = p;
2718 			}
2719 			/* find ones that need attention */
2720 			for (p = peers; p != NULL; p = p->next) {
2721 				/* needs to be deleted? */
2722 				if (p->conf.reconf_action == RECONF_NONE &&
2723 				    !p->template)
2724 					p->conf.reconf_action = RECONF_DELETE;
2725 				/* had demotion, is demoted, demote removed? */
2726 				if (p->demoted && !p->conf.demote_group[0])
2727 						session_demote(p, -1);
2728 			}
2729 
2730 			/* delete old listeners */
2731 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2732 			    la = nla) {
2733 				nla = TAILQ_NEXT(la, entry);
2734 				if (la->reconf == RECONF_NONE) {
2735 					log_info("not listening on %s any more",
2736 					    log_sockaddr(
2737 					    (struct sockaddr *)&la->sa));
2738 					TAILQ_REMOVE(conf->listen_addrs, la,
2739 					    entry);
2740 					close(la->fd);
2741 					free(la);
2742 				}
2743 			}
2744 
2745 			/* add new listeners */
2746 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2747 			    NULL) {
2748 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2749 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2750 				    entry);
2751 			}
2752 
2753 			setup_listeners(listener_cnt);
2754 			free(nconf->listen_addrs);
2755 			free(nconf);
2756 			nconf = NULL;
2757 			pending_reconf = 0;
2758 			log_info("SE reconfigured");
2759 			imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
2760 			    -1, NULL, 0);
2761 			break;
2762 		case IMSG_IFINFO:
2763 			if (idx != PFD_PIPE_MAIN)
2764 				fatalx("IFINFO message not from parent");
2765 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2766 			    sizeof(struct kif))
2767 				fatalx("IFINFO imsg with wrong len");
2768 			kif = imsg.data;
2769 			depend_ok = (kif->flags & IFF_UP) &&
2770 			    LINK_STATE_IS_UP(kif->link_state);
2771 
2772 			for (p = peers; p != NULL; p = p->next)
2773 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2774 					if (depend_ok && !p->depend_ok) {
2775 						p->depend_ok = depend_ok;
2776 						bgp_fsm(p, EVNT_START);
2777 					} else if (!depend_ok && p->depend_ok) {
2778 						p->depend_ok = depend_ok;
2779 						session_stop(p,
2780 						    ERR_CEASE_OTHER_CHANGE);
2781 					}
2782 				}
2783 			break;
2784 		case IMSG_MRT_OPEN:
2785 		case IMSG_MRT_REOPEN:
2786 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2787 			    sizeof(struct mrt)) {
2788 				log_warnx("wrong imsg len");
2789 				break;
2790 			}
2791 
2792 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2793 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2794 				log_warnx("expected to receive fd for mrt dump "
2795 				    "but didn't receive any");
2796 
2797 			mrt = mrt_get(&mrthead, &xmrt);
2798 			if (mrt == NULL) {
2799 				/* new dump */
2800 				mrt = calloc(1, sizeof(struct mrt));
2801 				if (mrt == NULL)
2802 					fatal("session_dispatch_imsg");
2803 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2804 				TAILQ_INIT(&mrt->wbuf.bufs);
2805 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2806 			} else {
2807 				/* old dump reopened */
2808 				close(mrt->wbuf.fd);
2809 				mrt->wbuf.fd = xmrt.wbuf.fd;
2810 			}
2811 			break;
2812 		case IMSG_MRT_CLOSE:
2813 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2814 			    sizeof(struct mrt)) {
2815 				log_warnx("wrong imsg len");
2816 				break;
2817 			}
2818 
2819 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2820 			mrt = mrt_get(&mrthead, &xmrt);
2821 			if (mrt != NULL)
2822 				mrt_done(mrt);
2823 			break;
2824 		case IMSG_CTL_KROUTE:
2825 		case IMSG_CTL_KROUTE_ADDR:
2826 		case IMSG_CTL_SHOW_NEXTHOP:
2827 		case IMSG_CTL_SHOW_INTERFACE:
2828 		case IMSG_CTL_SHOW_FIB_TABLES:
2829 			if (idx != PFD_PIPE_MAIN)
2830 				fatalx("ctl kroute request not from parent");
2831 			control_imsg_relay(&imsg);
2832 			break;
2833 		case IMSG_CTL_SHOW_RIB:
2834 		case IMSG_CTL_SHOW_RIB_PREFIX:
2835 		case IMSG_CTL_SHOW_RIB_ATTR:
2836 		case IMSG_CTL_SHOW_RIB_MEM:
2837 		case IMSG_CTL_SHOW_NETWORK:
2838 		case IMSG_CTL_SHOW_NEIGHBOR:
2839 			if (idx != PFD_PIPE_ROUTE_CTL)
2840 				fatalx("ctl rib request not from RDE");
2841 			control_imsg_relay(&imsg);
2842 			break;
2843 		case IMSG_CTL_END:
2844 		case IMSG_CTL_RESULT:
2845 			control_imsg_relay(&imsg);
2846 			break;
2847 		case IMSG_UPDATE:
2848 			if (idx != PFD_PIPE_ROUTE)
2849 				fatalx("update request not from RDE");
2850 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2851 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2852 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2853 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2854 				log_warnx("RDE sent invalid update");
2855 			else
2856 				session_update(imsg.hdr.peerid, imsg.data,
2857 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2858 			break;
2859 		case IMSG_UPDATE_ERR:
2860 			if (idx != PFD_PIPE_ROUTE)
2861 				fatalx("update request not from RDE");
2862 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2863 				log_warnx("RDE sent invalid notification");
2864 				break;
2865 			}
2866 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2867 				log_warnx("no such peer: id=%u",
2868 				    imsg.hdr.peerid);
2869 				break;
2870 			}
2871 			data = imsg.data;
2872 			errcode = *data++;
2873 			subcode = *data++;
2874 
2875 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2876 				data = NULL;
2877 
2878 			session_notification(p, errcode, subcode,
2879 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2880 			switch (errcode) {
2881 			case ERR_CEASE:
2882 				switch (subcode) {
2883 				case ERR_CEASE_MAX_PREFIX:
2884 					bgp_fsm(p, EVNT_STOP);
2885 					if (p->conf.max_prefix_restart)
2886 						timer_set(p, Timer_IdleHold, 60 *
2887 						    p->conf.max_prefix_restart);
2888 					break;
2889 				default:
2890 					bgp_fsm(p, EVNT_CON_FATAL);
2891 					break;
2892 				}
2893 				break;
2894 			default:
2895 				bgp_fsm(p, EVNT_CON_FATAL);
2896 				break;
2897 			}
2898 			break;
2899 		case IMSG_SESSION_RESTARTED:
2900 			if (idx != PFD_PIPE_ROUTE)
2901 				fatalx("update request not from RDE");
2902 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
2903 				log_warnx("RDE sent invalid restart msg");
2904 				break;
2905 			}
2906 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2907 				log_warnx("no such peer: id=%u",
2908 				    imsg.hdr.peerid);
2909 				break;
2910 			}
2911 			memcpy(&aid, imsg.data, sizeof(aid));
2912 			if (aid >= AID_MAX)
2913 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
2914 			if (p->capa.neg.grestart.flags[aid] &
2915 			    CAPA_GR_RESTARTING) {
2916 				log_peer_warnx(&p->conf,
2917 				    "graceful restart of %s finished",
2918 				    aid2str(aid));
2919 				p->capa.neg.grestart.flags[aid] &=
2920 				    ~CAPA_GR_RESTARTING;
2921 				timer_stop(p, Timer_RestartTimeout);
2922 
2923 				/* signal back to RDE to cleanup stale routes */
2924 				if (imsg_compose(ibuf_rde,
2925 				    IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0,
2926 				    -1, &aid, sizeof(aid)) == -1)
2927 					fatal("imsg_compose: "
2928 					    "IMSG_SESSION_RESTARTED");
2929 			}
2930 			break;
2931 		default:
2932 			break;
2933 		}
2934 		imsg_free(&imsg);
2935 	}
2936 }
2937 
2938 int
2939 la_cmp(struct listen_addr *a, struct listen_addr *b)
2940 {
2941 	struct sockaddr_in	*in_a, *in_b;
2942 	struct sockaddr_in6	*in6_a, *in6_b;
2943 
2944 	if (a->sa.ss_family != b->sa.ss_family)
2945 		return (1);
2946 
2947 	switch (a->sa.ss_family) {
2948 	case AF_INET:
2949 		in_a = (struct sockaddr_in *)&a->sa;
2950 		in_b = (struct sockaddr_in *)&b->sa;
2951 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2952 			return (1);
2953 		if (in_a->sin_port != in_b->sin_port)
2954 			return (1);
2955 		break;
2956 	case AF_INET6:
2957 		in6_a = (struct sockaddr_in6 *)&a->sa;
2958 		in6_b = (struct sockaddr_in6 *)&b->sa;
2959 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2960 		    sizeof(struct in6_addr)))
2961 			return (1);
2962 		if (in6_a->sin6_port != in6_b->sin6_port)
2963 			return (1);
2964 		break;
2965 	default:
2966 		fatal("king bula sez: unknown address family");
2967 		/* NOTREACHED */
2968 	}
2969 
2970 	return (0);
2971 }
2972 
2973 struct peer *
2974 getpeerbyaddr(struct bgpd_addr *addr)
2975 {
2976 	struct peer *p;
2977 
2978 	/* we might want a more effective way to find peers by IP */
2979 	for (p = peers; p != NULL &&
2980 	    memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr));
2981 	    p = p->next)
2982 		;	/* nothing */
2983 
2984 	return (p);
2985 }
2986 
2987 struct peer *
2988 getpeerbydesc(const char *descr)
2989 {
2990 	struct peer	*p, *res = NULL;
2991 	int		 match = 0;
2992 
2993 	for (p = peers; p != NULL; p = p->next)
2994 		if (!strcmp(p->conf.descr, descr)) {
2995 			res = p;
2996 			match++;
2997 		}
2998 
2999 	if (match > 1)
3000 		log_info("neighbor description \"%s\" not unique, request "
3001 		    "aborted", descr);
3002 
3003 	if (match == 1)
3004 		return (res);
3005 	else
3006 		return (NULL);
3007 }
3008 
3009 struct peer *
3010 getpeerbyip(struct sockaddr *ip)
3011 {
3012 	struct bgpd_addr addr;
3013 	struct peer	*p, *newpeer, *loose = NULL;
3014 	u_int32_t	 id;
3015 
3016 	sa2addr(ip, &addr);
3017 
3018 	/* we might want a more effective way to find peers by IP */
3019 	for (p = peers; p != NULL; p = p->next)
3020 		if (!p->conf.template &&
3021 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3022 			return (p);
3023 
3024 	/* try template matching */
3025 	for (p = peers; p != NULL; p = p->next)
3026 		if (p->conf.template &&
3027 		    p->conf.remote_addr.aid == addr.aid &&
3028 		    session_match_mask(p, &addr))
3029 			if (loose == NULL || loose->conf.remote_masklen <
3030 			    p->conf.remote_masklen)
3031 				loose = p;
3032 
3033 	if (loose != NULL) {
3034 		/* clone */
3035 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3036 			fatal(NULL);
3037 		memcpy(newpeer, loose, sizeof(struct peer));
3038 		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
3039 			for (p = peers; p != NULL && p->conf.id != id;
3040 			    p = p->next)
3041 				;	/* nothing */
3042 			if (p == NULL) {	/* we found a free id */
3043 				break;
3044 			}
3045 		}
3046 		newpeer->template = loose;
3047 		session_template_clone(newpeer, ip, id, 0);
3048 		newpeer->state = newpeer->prev_state = STATE_NONE;
3049 		newpeer->conf.reconf_action = RECONF_KEEP;
3050 		newpeer->rbuf = NULL;
3051 		init_peer(newpeer);
3052 		bgp_fsm(newpeer, EVNT_START);
3053 		newpeer->next = peers;
3054 		peers = newpeer;
3055 		return (newpeer);
3056 	}
3057 
3058 	return (NULL);
3059 }
3060 
3061 void
3062 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id,
3063     u_int32_t as)
3064 {
3065 	struct bgpd_addr	remote_addr;
3066 
3067 	if (ip)
3068 		sa2addr(ip, &remote_addr);
3069 	else
3070 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3071 
3072 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3073 
3074 	p->conf.id = id;
3075 
3076 	if (as) {
3077 		p->conf.remote_as = as;
3078 		p->conf.ebgp = (p->conf.remote_as != conf->as);
3079 		if (!p->conf.ebgp)
3080 			/* force enforce_as off for iBGP sessions */
3081 			p->conf.enforce_as = ENFORCE_AS_OFF;
3082 	}
3083 
3084 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3085 	switch (p->conf.remote_addr.aid) {
3086 	case AID_INET:
3087 		p->conf.remote_masklen = 32;
3088 		break;
3089 	case AID_INET6:
3090 		p->conf.remote_masklen = 128;
3091 		break;
3092 	}
3093 	p->conf.template = 0;
3094 }
3095 
3096 int
3097 session_match_mask(struct peer *p, struct bgpd_addr *a)
3098 {
3099 	in_addr_t	 v4mask;
3100 	struct in6_addr	 masked;
3101 
3102 	switch (p->conf.remote_addr.aid) {
3103 	case AID_INET:
3104 		v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
3105 		if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask))
3106 			return (1);
3107 		return (0);
3108 	case AID_INET6:
3109 		inet6applymask(&masked, &a->v6, p->conf.remote_masklen);
3110 
3111 		if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked)))
3112 			return (1);
3113 		return (0);
3114 	}
3115 	return (0);
3116 }
3117 
3118 struct peer *
3119 getpeerbyid(u_int32_t peerid)
3120 {
3121 	struct peer *p;
3122 
3123 	/* we might want a more effective way to find peers by IP */
3124 	for (p = peers; p != NULL &&
3125 	    p->conf.id != peerid; p = p->next)
3126 		;	/* nothing */
3127 
3128 	return (p);
3129 }
3130 
3131 void
3132 session_down(struct peer *peer)
3133 {
3134 	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
3135 	peer->stats.last_updown = time(NULL);
3136 	if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
3137 	    NULL, 0) == -1)
3138 		fatalx("imsg_compose error");
3139 }
3140 
3141 void
3142 session_up(struct peer *p)
3143 {
3144 	struct session_up	 sup;
3145 
3146 	if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
3147 	    &p->conf, sizeof(p->conf)) == -1)
3148 		fatalx("imsg_compose error");
3149 
3150 	sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr);
3151 	sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr);
3152 
3153 	sup.remote_bgpid = p->remote_bgpid;
3154 	sup.short_as = p->short_as;
3155 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3156 	p->stats.last_updown = time(NULL);
3157 	if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
3158 	    &sup, sizeof(sup)) == -1)
3159 		fatalx("imsg_compose error");
3160 }
3161 
3162 int
3163 imsg_compose_parent(int type, u_int32_t peerid, pid_t pid, void *data,
3164     u_int16_t datalen)
3165 {
3166 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
3167 }
3168 
3169 int
3170 imsg_compose_rde(int type, pid_t pid, void *data, u_int16_t datalen)
3171 {
3172 	return (imsg_compose(ibuf_rde, type, 0, pid, -1, data, datalen));
3173 }
3174 
3175 void
3176 session_demote(struct peer *p, int level)
3177 {
3178 	struct demote_msg	msg;
3179 
3180 	strlcpy(msg.demote_group, p->conf.demote_group,
3181 	    sizeof(msg.demote_group));
3182 	msg.level = level;
3183 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3184 	    &msg, sizeof(msg)) == -1)
3185 		fatalx("imsg_compose error");
3186 
3187 	p->demoted += level;
3188 }
3189 
3190 void
3191 session_stop(struct peer *peer, u_int8_t subcode)
3192 {
3193 	switch (peer->state) {
3194 	case STATE_OPENSENT:
3195 	case STATE_OPENCONFIRM:
3196 	case STATE_ESTABLISHED:
3197 		session_notification(peer, ERR_CEASE, subcode, NULL, 0);
3198 		break;
3199 	default:
3200 		/* session not open, no need to send notification */
3201 		break;
3202 	}
3203 	bgp_fsm(peer, EVNT_STOP);
3204 }
3205