xref: /openbsd-src/usr.sbin/bgpd/session.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: session.c,v 1.354 2016/09/03 16:22:17 renato Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 
21 #include <sys/mman.h>
22 #include <sys/socket.h>
23 #include <sys/time.h>
24 #include <sys/resource.h>
25 #include <sys/un.h>
26 #include <net/if_types.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <poll.h>
37 #include <pwd.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 
44 #include "bgpd.h"
45 #include "mrt.h"
46 #include "session.h"
47 
48 #define PFD_PIPE_MAIN		0
49 #define PFD_PIPE_ROUTE		1
50 #define PFD_PIPE_ROUTE_CTL	2
51 #define PFD_SOCK_CTL		3
52 #define PFD_SOCK_RCTL		4
53 #define PFD_SOCK_PFKEY		5
54 #define PFD_LISTENERS_START	6
55 
56 void	session_sighdlr(int);
57 int	setup_listeners(u_int *);
58 void	init_conf(struct bgpd_config *);
59 void	init_peer(struct peer *);
60 void	start_timer_holdtime(struct peer *);
61 void	start_timer_keepalive(struct peer *);
62 void	session_close_connection(struct peer *);
63 void	change_state(struct peer *, enum session_state, enum session_events);
64 int	session_setup_socket(struct peer *);
65 void	session_accept(int);
66 int	session_connect(struct peer *);
67 void	session_tcp_established(struct peer *);
68 void	session_capa_ann_none(struct peer *);
69 int	session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
70 int	session_capa_add_mp(struct ibuf *, u_int8_t);
71 int	session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t);
72 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
73 int	session_sendmsg(struct bgp_msg *, struct peer *);
74 void	session_open(struct peer *);
75 void	session_keepalive(struct peer *);
76 void	session_update(u_int32_t, void *, size_t);
77 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
78 	    ssize_t);
79 void	session_rrefresh(struct peer *, u_int8_t);
80 int	session_graceful_restart(struct peer *);
81 int	session_graceful_stop(struct peer *);
82 int	session_dispatch_msg(struct pollfd *, struct peer *);
83 int	session_process_msg(struct peer *);
84 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
85 int	parse_open(struct peer *);
86 int	parse_update(struct peer *);
87 int	parse_refresh(struct peer *);
88 int	parse_notification(struct peer *);
89 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
90 int	capa_neg_calc(struct peer *);
91 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
92 void	session_up(struct peer *);
93 void	session_down(struct peer *);
94 void	session_demote(struct peer *, int);
95 
96 int		 la_cmp(struct listen_addr *, struct listen_addr *);
97 struct peer	*getpeerbyip(struct sockaddr *);
98 void		 session_template_clone(struct peer *, struct sockaddr *,
99 		    u_int32_t, u_int32_t);
100 int		 session_match_mask(struct peer *, struct bgpd_addr *);
101 struct peer	*getpeerbyid(u_int32_t);
102 
103 struct bgpd_config	*conf, *nconf;
104 struct bgpd_sysdep	 sysdep;
105 struct peer		*peers, *npeers;
106 volatile sig_atomic_t	 session_quit;
107 int			 pending_reconf;
108 int			 csock = -1, rcsock = -1;
109 u_int			 peer_cnt;
110 struct imsgbuf		*ibuf_rde;
111 struct imsgbuf		*ibuf_rde_ctl;
112 struct imsgbuf		*ibuf_main;
113 
114 struct mrt_head		 mrthead;
115 time_t			 pauseaccept;
116 
117 void
118 session_sighdlr(int sig)
119 {
120 	switch (sig) {
121 	case SIGINT:
122 	case SIGTERM:
123 		session_quit = 1;
124 		break;
125 	}
126 }
127 
128 int
129 setup_listeners(u_int *la_cnt)
130 {
131 	int			 ttl = 255;
132 	int			 opt;
133 	struct listen_addr	*la;
134 	u_int			 cnt = 0;
135 
136 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
137 		la->reconf = RECONF_NONE;
138 		cnt++;
139 
140 		if (la->flags & LISTENER_LISTENING)
141 			continue;
142 
143 		if (la->fd == -1) {
144 			log_warn("cannot establish listener on %s: invalid fd",
145 			    log_sockaddr((struct sockaddr *)&la->sa));
146 			continue;
147 		}
148 
149 		opt = 1;
150 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
151 		    &opt, sizeof(opt)) == -1) {
152 			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
153 				log_warnx("md5sig not available, disabling");
154 				sysdep.no_md5sig = 1;
155 			} else
156 				fatal("setsockopt TCP_MD5SIG");
157 		}
158 
159 		/* set ttl to 255 so that ttl-security works */
160 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
161 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
162 			log_warn("setup_listeners setsockopt TTL");
163 			continue;
164 		}
165 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
166 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
167 			log_warn("setup_listeners setsockopt hoplimit");
168 			continue;
169 		}
170 
171 		if (listen(la->fd, MAX_BACKLOG)) {
172 			close(la->fd);
173 			fatal("listen");
174 		}
175 
176 		la->flags |= LISTENER_LISTENING;
177 
178 		log_info("listening on %s",
179 		    log_sockaddr((struct sockaddr *)&la->sa));
180 	}
181 
182 	*la_cnt = cnt;
183 
184 	return (0);
185 }
186 
187 void
188 session_main(int debug, int verbose)
189 {
190 	int			 timeout, pfkeysock;
191 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
192 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
193 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
194 	u_int			 new_cnt;
195 	u_int32_t		 ctl_queued;
196 	struct passwd		*pw;
197 	struct peer		*p, **peer_l = NULL, *last, *next;
198 	struct mrt		*m, *xm, **mrt_l = NULL;
199 	struct pollfd		*pfd = NULL;
200 	struct ctl_conn		*ctl_conn;
201 	struct listen_addr	*la;
202 	void			*newp;
203 	short			 events;
204 
205 	bgpd_process = PROC_SE;
206 	log_procname = log_procnames[bgpd_process];
207 
208 	log_init(debug);
209 	log_verbose(verbose);
210 
211 	if ((pw = getpwnam(BGPD_USER)) == NULL)
212 		fatal(NULL);
213 
214 	if (chroot(pw->pw_dir) == -1)
215 		fatal("chroot");
216 	if (chdir("/") == -1)
217 		fatal("chdir(\"/\")");
218 
219 	setproctitle("session engine");
220 	pfkeysock = pfkey_init(&sysdep);
221 
222 	if (setgroups(1, &pw->pw_gid) ||
223 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
224 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
225 		fatal("can't drop privileges");
226 
227 	if (pledge("stdio inet recvfd", NULL) == -1)
228 		fatal("pledge");
229 
230 	signal(SIGTERM, session_sighdlr);
231 	signal(SIGINT, session_sighdlr);
232 	signal(SIGPIPE, SIG_IGN);
233 	signal(SIGHUP, SIG_IGN);
234 	signal(SIGALRM, SIG_IGN);
235 	signal(SIGUSR1, SIG_IGN);
236 
237 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
238 		fatal(NULL);
239 	imsg_init(ibuf_main, 3);
240 
241 	TAILQ_INIT(&ctl_conns);
242 	LIST_INIT(&mrthead);
243 	listener_cnt = 0;
244 	peer_cnt = 0;
245 	ctl_cnt = 0;
246 
247 	if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL)
248 		fatal(NULL);
249 	if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) ==
250 	    NULL)
251 		fatal(NULL);
252 	TAILQ_INIT(conf->listen_addrs);
253 
254 	log_info("session engine ready");
255 
256 	while (session_quit == 0) {
257 		/* check for peers to be initialized or deleted */
258 		last = NULL;
259 		if (!pending_reconf) {
260 			for (p = peers; p != NULL; p = next) {
261 				next = p->next;
262 				/* cloned peer that idled out? */
263 				if (p->template && (p->state == STATE_IDLE ||
264 				    p->state == STATE_ACTIVE) &&
265 				    time(NULL) - p->stats.last_updown >=
266 				    INTERVAL_HOLD_CLONED)
267 					p->conf.reconf_action = RECONF_DELETE;
268 
269 				/* new peer that needs init? */
270 				if (p->state == STATE_NONE)
271 					init_peer(p);
272 
273 				/* reinit due? */
274 				if (p->conf.reconf_action == RECONF_REINIT) {
275 					session_stop(p, ERR_CEASE_ADMIN_RESET);
276 					if (!p->conf.down)
277 						timer_set(p, Timer_IdleHold, 0);
278 				}
279 
280 				/* deletion due? */
281 				if (p->conf.reconf_action == RECONF_DELETE) {
282 					if (p->demoted)
283 						session_demote(p, -1);
284 					p->conf.demote_group[0] = 0;
285 					session_stop(p, ERR_CEASE_PEER_UNCONF);
286 					log_peer_warnx(&p->conf, "removed");
287 					if (last != NULL)
288 						last->next = next;
289 					else
290 						peers = next;
291 					timer_remove_all(p);
292 					free(p);
293 					peer_cnt--;
294 					continue;
295 				}
296 				p->conf.reconf_action = RECONF_NONE;
297 				last = p;
298 			}
299 		}
300 
301 		if (peer_cnt > peer_l_elms) {
302 			if ((newp = reallocarray(peer_l, peer_cnt,
303 			    sizeof(struct peer *))) == NULL) {
304 				/* panic for now  */
305 				log_warn("could not resize peer_l from %u -> %u"
306 				    " entries", peer_l_elms, peer_cnt);
307 				fatalx("exiting");
308 			}
309 			peer_l = newp;
310 			peer_l_elms = peer_cnt;
311 		}
312 
313 		mrt_cnt = 0;
314 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
315 			xm = LIST_NEXT(m, entry);
316 			if (m->state == MRT_STATE_REMOVE) {
317 				mrt_clean(m);
318 				LIST_REMOVE(m, entry);
319 				free(m);
320 				continue;
321 			}
322 			if (m->wbuf.queued)
323 				mrt_cnt++;
324 		}
325 
326 		if (mrt_cnt > mrt_l_elms) {
327 			if ((newp = reallocarray(mrt_l, mrt_cnt,
328 			    sizeof(struct mrt *))) == NULL) {
329 				/* panic for now  */
330 				log_warn("could not resize mrt_l from %u -> %u"
331 				    " entries", mrt_l_elms, mrt_cnt);
332 				fatalx("exiting");
333 			}
334 			mrt_l = newp;
335 			mrt_l_elms = mrt_cnt;
336 		}
337 
338 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
339 		    ctl_cnt + mrt_cnt;
340 		if (new_cnt > pfd_elms) {
341 			if ((newp = reallocarray(pfd, new_cnt,
342 			    sizeof(struct pollfd))) == NULL) {
343 				/* panic for now  */
344 				log_warn("could not resize pfd from %u -> %u"
345 				    " entries", pfd_elms, new_cnt);
346 				fatalx("exiting");
347 			}
348 			pfd = newp;
349 			pfd_elms = new_cnt;
350 		}
351 
352 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
353 
354 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
355 		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
356 
357 		ctl_queued = 0;
358 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry)
359 			ctl_queued += ctl_conn->ibuf.w.queued;
360 
361 		/*
362 		 * Do not act as unlimited buffer. Don't read in more
363 		 * messages if the ctl sockets are getting full.
364 		 */
365 		if (ctl_queued < SESSION_CTL_QUEUE_MAX)
366 			set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
367 
368 		if (pauseaccept == 0) {
369 			pfd[PFD_SOCK_CTL].fd = csock;
370 			pfd[PFD_SOCK_CTL].events = POLLIN;
371 			pfd[PFD_SOCK_RCTL].fd = rcsock;
372 			pfd[PFD_SOCK_RCTL].events = POLLIN;
373 		} else {
374 			pfd[PFD_SOCK_CTL].fd = -1;
375 			pfd[PFD_SOCK_RCTL].fd = -1;
376 		}
377 		pfd[PFD_SOCK_PFKEY].fd = pfkeysock;
378 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
379 
380 		i = PFD_LISTENERS_START;
381 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
382 			if (pauseaccept == 0) {
383 				pfd[i].fd = la->fd;
384 				pfd[i].events = POLLIN;
385 			} else
386 				pfd[i].fd = -1;
387 			i++;
388 		}
389 		idx_listeners = i;
390 		timeout = 240;	/* loop every 240s at least */
391 
392 		for (p = peers; p != NULL; p = p->next) {
393 			time_t	nextaction;
394 			struct peer_timer *pt;
395 
396 			/* check timers */
397 			if ((pt = timer_nextisdue(p)) != NULL) {
398 				switch (pt->type) {
399 				case Timer_Hold:
400 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
401 					break;
402 				case Timer_ConnectRetry:
403 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
404 					break;
405 				case Timer_Keepalive:
406 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
407 					break;
408 				case Timer_IdleHold:
409 					bgp_fsm(p, EVNT_START);
410 					break;
411 				case Timer_IdleHoldReset:
412 					p->IdleHoldTime /= 2;
413 					if (p->IdleHoldTime <=
414 					    INTERVAL_IDLE_HOLD_INITIAL) {
415 						p->IdleHoldTime =
416 						    INTERVAL_IDLE_HOLD_INITIAL;
417 						timer_stop(p,
418 						    Timer_IdleHoldReset);
419 						p->errcnt = 0;
420 					} else
421 						timer_set(p,
422 						    Timer_IdleHoldReset,
423 						    p->IdleHoldTime);
424 					break;
425 				case Timer_CarpUndemote:
426 					timer_stop(p, Timer_CarpUndemote);
427 					if (p->demoted &&
428 					    p->state == STATE_ESTABLISHED)
429 						session_demote(p, -1);
430 					break;
431 				case Timer_RestartTimeout:
432 					timer_stop(p, Timer_RestartTimeout);
433 					session_graceful_stop(p);
434 					break;
435 				default:
436 					fatalx("King Bula lost in time");
437 				}
438 			}
439 			if ((nextaction = timer_nextduein(p)) != -1 &&
440 			    nextaction < timeout)
441 				timeout = nextaction;
442 
443 			/* are we waiting for a write? */
444 			events = POLLIN;
445 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
446 				events |= POLLOUT;
447 			/* is there still work to do? */
448 			if (p->rbuf && p->rbuf->wpos)
449 				timeout = 0;
450 
451 			/* poll events */
452 			if (p->fd != -1 && events != 0) {
453 				pfd[i].fd = p->fd;
454 				pfd[i].events = events;
455 				peer_l[i - idx_listeners] = p;
456 				i++;
457 			}
458 		}
459 
460 		idx_peers = i;
461 
462 		LIST_FOREACH(m, &mrthead, entry)
463 			if (m->wbuf.queued) {
464 				pfd[i].fd = m->wbuf.fd;
465 				pfd[i].events = POLLOUT;
466 				mrt_l[i - idx_peers] = m;
467 				i++;
468 			}
469 
470 		idx_mrts = i;
471 
472 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
473 			pfd[i].fd = ctl_conn->ibuf.fd;
474 			pfd[i].events = POLLIN;
475 			if (ctl_conn->ibuf.w.queued > 0)
476 				pfd[i].events |= POLLOUT;
477 			i++;
478 		}
479 
480 		if (pauseaccept && timeout > 1)
481 			timeout = 1;
482 		if (timeout < 0)
483 			timeout = 0;
484 		if (poll(pfd, i, timeout * 1000) == -1)
485 			if (errno != EINTR)
486 				fatal("poll error");
487 
488 		/*
489 		 * If we previously saw fd exhaustion, we stop accept()
490 		 * for 1 second to throttle the accept() loop.
491 		 */
492 		if (pauseaccept && getmonotime() > pauseaccept + 1)
493 			pauseaccept = 0;
494 
495 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
496 			log_warnx("SE: Lost connection to parent");
497 			session_quit = 1;
498 			continue;
499 		} else
500 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
501 			    &listener_cnt);
502 
503 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
504 			log_warnx("SE: Lost connection to RDE");
505 			msgbuf_clear(&ibuf_rde->w);
506 			free(ibuf_rde);
507 			ibuf_rde = NULL;
508 		} else
509 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
510 			    &listener_cnt);
511 
512 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
513 		    -1) {
514 			log_warnx("SE: Lost connection to RDE control");
515 			msgbuf_clear(&ibuf_rde_ctl->w);
516 			free(ibuf_rde_ctl);
517 			ibuf_rde_ctl = NULL;
518 		} else
519 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
520 			    &listener_cnt);
521 
522 		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
523 			ctl_cnt += control_accept(csock, 0);
524 
525 		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
526 			ctl_cnt += control_accept(rcsock, 1);
527 
528 		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
529 			if (pfkey_read(pfkeysock, NULL) == -1) {
530 				log_warnx("pfkey_read failed, exiting...");
531 				session_quit = 1;
532 			}
533 		}
534 
535 		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
536 			if (pfd[j].revents & POLLIN)
537 				session_accept(pfd[j].fd);
538 
539 		for (; j < idx_peers; j++)
540 			session_dispatch_msg(&pfd[j],
541 			    peer_l[j - idx_listeners]);
542 
543 		for (p = peers; p != NULL; p = p->next)
544 			if (p->rbuf && p->rbuf->wpos)
545 				session_process_msg(p);
546 
547 		for (; j < idx_mrts; j++)
548 			if (pfd[j].revents & POLLOUT)
549 				mrt_write(mrt_l[j - idx_peers]);
550 
551 		for (; j < i; j++)
552 			control_dispatch_msg(&pfd[j], &ctl_cnt);
553 	}
554 
555 	/* close pipes */
556 	if (ibuf_rde) {
557 		msgbuf_write(&ibuf_rde->w);
558 		msgbuf_clear(&ibuf_rde->w);
559 		close(ibuf_rde->fd);
560 		free(ibuf_rde);
561 	}
562 	if (ibuf_rde_ctl) {
563 		msgbuf_clear(&ibuf_rde_ctl->w);
564 		close(ibuf_rde_ctl->fd);
565 		free(ibuf_rde_ctl);
566 	}
567 	msgbuf_write(&ibuf_main->w);
568 	msgbuf_clear(&ibuf_main->w);
569 	close(ibuf_main->fd);
570 	free(ibuf_main);
571 
572 	while ((p = peers) != NULL) {
573 		peers = p->next;
574 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
575 		pfkey_remove(p);
576 		free(p);
577 	}
578 
579 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
580 		mrt_clean(m);
581 		LIST_REMOVE(m, entry);
582 		free(m);
583 	}
584 
585 	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
586 		TAILQ_REMOVE(conf->listen_addrs, la, entry);
587 		free(la);
588 	}
589 	free(conf->listen_addrs);
590 	free(peer_l);
591 	free(mrt_l);
592 	free(pfd);
593 
594 
595 	control_shutdown(csock);
596 	control_shutdown(rcsock);
597 	log_info("session engine exiting");
598 	exit(0);
599 }
600 
601 void
602 init_conf(struct bgpd_config *c)
603 {
604 	if (!c->holdtime)
605 		c->holdtime = INTERVAL_HOLD;
606 	if (!c->connectretry)
607 		c->connectretry = INTERVAL_CONNECTRETRY;
608 }
609 
610 void
611 init_peer(struct peer *p)
612 {
613 	TAILQ_INIT(&p->timers);
614 	p->fd = p->wbuf.fd = -1;
615 
616 	if (p->conf.if_depend[0])
617 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
618 		    p->conf.if_depend, sizeof(p->conf.if_depend));
619 	else
620 		p->depend_ok = 1;
621 
622 	peer_cnt++;
623 
624 	change_state(p, STATE_IDLE, EVNT_NONE);
625 	if (p->conf.down)
626 		timer_stop(p, Timer_IdleHold);		/* no autostart */
627 	else
628 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
629 
630 	/*
631 	 * on startup, demote if requested.
632 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
633 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
634 	 */
635 	if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0])
636 		session_demote(p, +1);
637 }
638 
639 void
640 bgp_fsm(struct peer *peer, enum session_events event)
641 {
642 	switch (peer->state) {
643 	case STATE_NONE:
644 		/* nothing */
645 		break;
646 	case STATE_IDLE:
647 		switch (event) {
648 		case EVNT_START:
649 			timer_stop(peer, Timer_Hold);
650 			timer_stop(peer, Timer_Keepalive);
651 			timer_stop(peer, Timer_IdleHold);
652 
653 			/* allocate read buffer */
654 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
655 			if (peer->rbuf == NULL)
656 				fatal(NULL);
657 
658 			/* init write buffer */
659 			msgbuf_init(&peer->wbuf);
660 
661 			/* init pfkey - remove old if any, load new ones */
662 			pfkey_remove(peer);
663 			if (pfkey_establish(peer) == -1) {
664 				log_peer_warnx(&peer->conf,
665 				    "pfkey setup failed");
666 				return;
667 			}
668 
669 			peer->stats.last_sent_errcode = 0;
670 			peer->stats.last_sent_suberr = 0;
671 
672 			if (!peer->depend_ok)
673 				timer_stop(peer, Timer_ConnectRetry);
674 			else if (peer->passive || peer->conf.passive ||
675 			    peer->conf.template) {
676 				change_state(peer, STATE_ACTIVE, event);
677 				timer_stop(peer, Timer_ConnectRetry);
678 			} else {
679 				change_state(peer, STATE_CONNECT, event);
680 				timer_set(peer, Timer_ConnectRetry,
681 				    conf->connectretry);
682 				session_connect(peer);
683 			}
684 			peer->passive = 0;
685 			break;
686 		default:
687 			/* ignore */
688 			break;
689 		}
690 		break;
691 	case STATE_CONNECT:
692 		switch (event) {
693 		case EVNT_START:
694 			/* ignore */
695 			break;
696 		case EVNT_CON_OPEN:
697 			session_tcp_established(peer);
698 			session_open(peer);
699 			timer_stop(peer, Timer_ConnectRetry);
700 			peer->holdtime = INTERVAL_HOLD_INITIAL;
701 			start_timer_holdtime(peer);
702 			change_state(peer, STATE_OPENSENT, event);
703 			break;
704 		case EVNT_CON_OPENFAIL:
705 			timer_set(peer, Timer_ConnectRetry,
706 			    conf->connectretry);
707 			session_close_connection(peer);
708 			change_state(peer, STATE_ACTIVE, event);
709 			break;
710 		case EVNT_TIMER_CONNRETRY:
711 			timer_set(peer, Timer_ConnectRetry,
712 			    conf->connectretry);
713 			session_connect(peer);
714 			break;
715 		default:
716 			change_state(peer, STATE_IDLE, event);
717 			break;
718 		}
719 		break;
720 	case STATE_ACTIVE:
721 		switch (event) {
722 		case EVNT_START:
723 			/* ignore */
724 			break;
725 		case EVNT_CON_OPEN:
726 			session_tcp_established(peer);
727 			session_open(peer);
728 			timer_stop(peer, Timer_ConnectRetry);
729 			peer->holdtime = INTERVAL_HOLD_INITIAL;
730 			start_timer_holdtime(peer);
731 			change_state(peer, STATE_OPENSENT, event);
732 			break;
733 		case EVNT_CON_OPENFAIL:
734 			timer_set(peer, Timer_ConnectRetry,
735 			    conf->connectretry);
736 			session_close_connection(peer);
737 			change_state(peer, STATE_ACTIVE, event);
738 			break;
739 		case EVNT_TIMER_CONNRETRY:
740 			timer_set(peer, Timer_ConnectRetry,
741 			    peer->holdtime);
742 			change_state(peer, STATE_CONNECT, event);
743 			session_connect(peer);
744 			break;
745 		default:
746 			change_state(peer, STATE_IDLE, event);
747 			break;
748 		}
749 		break;
750 	case STATE_OPENSENT:
751 		switch (event) {
752 		case EVNT_START:
753 			/* ignore */
754 			break;
755 		case EVNT_STOP:
756 			change_state(peer, STATE_IDLE, event);
757 			break;
758 		case EVNT_CON_CLOSED:
759 			session_close_connection(peer);
760 			timer_set(peer, Timer_ConnectRetry,
761 			    conf->connectretry);
762 			change_state(peer, STATE_ACTIVE, event);
763 			break;
764 		case EVNT_CON_FATAL:
765 			change_state(peer, STATE_IDLE, event);
766 			break;
767 		case EVNT_TIMER_HOLDTIME:
768 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
769 			    0, NULL, 0);
770 			change_state(peer, STATE_IDLE, event);
771 			break;
772 		case EVNT_RCVD_OPEN:
773 			/* parse_open calls change_state itself on failure */
774 			if (parse_open(peer))
775 				break;
776 			session_keepalive(peer);
777 			change_state(peer, STATE_OPENCONFIRM, event);
778 			break;
779 		case EVNT_RCVD_NOTIFICATION:
780 			if (parse_notification(peer)) {
781 				change_state(peer, STATE_IDLE, event);
782 				/* don't punish, capa negotiation */
783 				timer_set(peer, Timer_IdleHold, 0);
784 				peer->IdleHoldTime /= 2;
785 			} else
786 				change_state(peer, STATE_IDLE, event);
787 			break;
788 		default:
789 			session_notification(peer,
790 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
791 			change_state(peer, STATE_IDLE, event);
792 			break;
793 		}
794 		break;
795 	case STATE_OPENCONFIRM:
796 		switch (event) {
797 		case EVNT_START:
798 			/* ignore */
799 			break;
800 		case EVNT_STOP:
801 			change_state(peer, STATE_IDLE, event);
802 			break;
803 		case EVNT_CON_CLOSED:
804 		case EVNT_CON_FATAL:
805 			change_state(peer, STATE_IDLE, event);
806 			break;
807 		case EVNT_TIMER_HOLDTIME:
808 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
809 			    0, NULL, 0);
810 			change_state(peer, STATE_IDLE, event);
811 			break;
812 		case EVNT_TIMER_KEEPALIVE:
813 			session_keepalive(peer);
814 			break;
815 		case EVNT_RCVD_KEEPALIVE:
816 			start_timer_holdtime(peer);
817 			change_state(peer, STATE_ESTABLISHED, event);
818 			break;
819 		case EVNT_RCVD_NOTIFICATION:
820 			parse_notification(peer);
821 			change_state(peer, STATE_IDLE, event);
822 			break;
823 		default:
824 			session_notification(peer,
825 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
826 			change_state(peer, STATE_IDLE, event);
827 			break;
828 		}
829 		break;
830 	case STATE_ESTABLISHED:
831 		switch (event) {
832 		case EVNT_START:
833 			/* ignore */
834 			break;
835 		case EVNT_STOP:
836 			change_state(peer, STATE_IDLE, event);
837 			break;
838 		case EVNT_CON_CLOSED:
839 		case EVNT_CON_FATAL:
840 			change_state(peer, STATE_IDLE, event);
841 			break;
842 		case EVNT_TIMER_HOLDTIME:
843 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
844 			    0, NULL, 0);
845 			change_state(peer, STATE_IDLE, event);
846 			break;
847 		case EVNT_TIMER_KEEPALIVE:
848 			session_keepalive(peer);
849 			break;
850 		case EVNT_RCVD_KEEPALIVE:
851 			start_timer_holdtime(peer);
852 			break;
853 		case EVNT_RCVD_UPDATE:
854 			start_timer_holdtime(peer);
855 			if (parse_update(peer))
856 				change_state(peer, STATE_IDLE, event);
857 			else
858 				start_timer_holdtime(peer);
859 			break;
860 		case EVNT_RCVD_NOTIFICATION:
861 			parse_notification(peer);
862 			change_state(peer, STATE_IDLE, event);
863 			break;
864 		default:
865 			session_notification(peer,
866 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
867 			change_state(peer, STATE_IDLE, event);
868 			break;
869 		}
870 		break;
871 	}
872 }
873 
874 void
875 start_timer_holdtime(struct peer *peer)
876 {
877 	if (peer->holdtime > 0)
878 		timer_set(peer, Timer_Hold, peer->holdtime);
879 	else
880 		timer_stop(peer, Timer_Hold);
881 }
882 
883 void
884 start_timer_keepalive(struct peer *peer)
885 {
886 	if (peer->holdtime > 0)
887 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
888 	else
889 		timer_stop(peer, Timer_Keepalive);
890 }
891 
892 void
893 session_close_connection(struct peer *peer)
894 {
895 	if (peer->fd != -1) {
896 		close(peer->fd);
897 		pauseaccept = 0;
898 	}
899 	peer->fd = peer->wbuf.fd = -1;
900 }
901 
902 void
903 change_state(struct peer *peer, enum session_state state,
904     enum session_events event)
905 {
906 	struct mrt	*mrt;
907 
908 	switch (state) {
909 	case STATE_IDLE:
910 		/* carp demotion first. new peers handled in init_peer */
911 		if (peer->state == STATE_ESTABLISHED &&
912 		    peer->conf.demote_group[0] && !peer->demoted)
913 			session_demote(peer, +1);
914 
915 		/*
916 		 * try to write out what's buffered (maybe a notification),
917 		 * don't bother if it fails
918 		 */
919 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
920 			msgbuf_write(&peer->wbuf);
921 
922 		/*
923 		 * we must start the timer for the next EVNT_START
924 		 * if we are coming here due to an error and the
925 		 * session was not established successfully before, the
926 		 * starttimerinterval needs to be exponentially increased
927 		 */
928 		if (peer->IdleHoldTime == 0)
929 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
930 		peer->holdtime = INTERVAL_HOLD_INITIAL;
931 		timer_stop(peer, Timer_ConnectRetry);
932 		timer_stop(peer, Timer_Keepalive);
933 		timer_stop(peer, Timer_Hold);
934 		timer_stop(peer, Timer_IdleHold);
935 		timer_stop(peer, Timer_IdleHoldReset);
936 		session_close_connection(peer);
937 		msgbuf_clear(&peer->wbuf);
938 		free(peer->rbuf);
939 		peer->rbuf = NULL;
940 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
941 
942 		if (event != EVNT_STOP) {
943 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
944 			if (event != EVNT_NONE &&
945 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
946 				peer->IdleHoldTime *= 2;
947 		}
948 		if (peer->state == STATE_ESTABLISHED) {
949 			if (peer->capa.neg.grestart.restart == 2 &&
950 			    (event == EVNT_CON_CLOSED ||
951 			    event == EVNT_CON_FATAL)) {
952 				/* don't punish graceful restart */
953 				timer_set(peer, Timer_IdleHold, 0);
954 				peer->IdleHoldTime /= 2;
955 				session_graceful_restart(peer);
956 			} else
957 				session_down(peer);
958 		}
959 		if (peer->state == STATE_NONE ||
960 		    peer->state == STATE_ESTABLISHED) {
961 			/* initialize capability negotiation structures */
962 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
963 			    sizeof(peer->capa.ann));
964 			if (!peer->conf.announce_capa)
965 				session_capa_ann_none(peer);
966 		}
967 		break;
968 	case STATE_CONNECT:
969 		if (peer->state == STATE_ESTABLISHED &&
970 		    peer->capa.neg.grestart.restart == 2) {
971 			/* do the graceful restart dance */
972 			session_graceful_restart(peer);
973 			peer->holdtime = INTERVAL_HOLD_INITIAL;
974 			timer_stop(peer, Timer_ConnectRetry);
975 			timer_stop(peer, Timer_Keepalive);
976 			timer_stop(peer, Timer_Hold);
977 			timer_stop(peer, Timer_IdleHold);
978 			timer_stop(peer, Timer_IdleHoldReset);
979 			session_close_connection(peer);
980 			msgbuf_clear(&peer->wbuf);
981 			bzero(&peer->capa.peer, sizeof(peer->capa.peer));
982 		}
983 		break;
984 	case STATE_ACTIVE:
985 		break;
986 	case STATE_OPENSENT:
987 		break;
988 	case STATE_OPENCONFIRM:
989 		break;
990 	case STATE_ESTABLISHED:
991 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
992 		if (peer->demoted)
993 			timer_set(peer, Timer_CarpUndemote,
994 			    INTERVAL_HOLD_DEMOTED);
995 		session_up(peer);
996 		break;
997 	default:		/* something seriously fucked */
998 		break;
999 	}
1000 
1001 	log_statechange(peer, state, event);
1002 	LIST_FOREACH(mrt, &mrthead, entry) {
1003 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
1004 			continue;
1005 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1006 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1007 		    mrt->group_id == peer->conf.groupid))
1008 			mrt_dump_state(mrt, peer->state, state, peer);
1009 	}
1010 	peer->prev_state = peer->state;
1011 	peer->state = state;
1012 }
1013 
1014 void
1015 session_accept(int listenfd)
1016 {
1017 	int			 connfd;
1018 	int			 opt;
1019 	socklen_t		 len;
1020 	struct sockaddr_storage	 cliaddr;
1021 	struct peer		*p = NULL;
1022 
1023 	len = sizeof(cliaddr);
1024 	if ((connfd = accept4(listenfd,
1025 	    (struct sockaddr *)&cliaddr, &len,
1026 	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
1027 		if (errno == ENFILE || errno == EMFILE)
1028 			pauseaccept = getmonotime();
1029 		else if (errno != EWOULDBLOCK && errno != EINTR &&
1030 		    errno != ECONNABORTED)
1031 			log_warn("accept");
1032 		return;
1033 	}
1034 
1035 	p = getpeerbyip((struct sockaddr *)&cliaddr);
1036 
1037 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1038 		if (timer_running(p, Timer_IdleHold, NULL)) {
1039 			/* fast reconnect after clear */
1040 			p->passive = 1;
1041 			bgp_fsm(p, EVNT_START);
1042 		}
1043 	}
1044 
1045 	if (p != NULL &&
1046 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1047 		if (p->fd != -1) {
1048 			if (p->state == STATE_CONNECT)
1049 				session_close_connection(p);
1050 			else {
1051 				close(connfd);
1052 				return;
1053 			}
1054 		}
1055 
1056 open:
1057 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1058 			log_peer_warnx(&p->conf,
1059 			    "ipsec or md5sig configured but not available");
1060 			close(connfd);
1061 			return;
1062 		}
1063 
1064 		if (p->conf.auth.method == AUTH_MD5SIG) {
1065 			if (sysdep.no_md5sig) {
1066 				log_peer_warnx(&p->conf,
1067 				    "md5sig configured but not available");
1068 				close(connfd);
1069 				return;
1070 			}
1071 			len = sizeof(opt);
1072 			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1073 			    &opt, &len) == -1)
1074 				fatal("getsockopt TCP_MD5SIG");
1075 			if (!opt) {	/* non-md5'd connection! */
1076 				log_peer_warnx(&p->conf,
1077 				    "connection attempt without md5 signature");
1078 				close(connfd);
1079 				return;
1080 			}
1081 		}
1082 		p->fd = p->wbuf.fd = connfd;
1083 		if (session_setup_socket(p)) {
1084 			close(connfd);
1085 			return;
1086 		}
1087 		bgp_fsm(p, EVNT_CON_OPEN);
1088 		return;
1089 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1090 	    p->capa.neg.grestart.restart == 2) {
1091 		/* first do the graceful restart dance */
1092 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1093 		/* then do part of the open dance */
1094 		goto open;
1095 	} else {
1096 		log_conn_attempt(p, (struct sockaddr *)&cliaddr);
1097 		close(connfd);
1098 	}
1099 }
1100 
1101 int
1102 session_connect(struct peer *peer)
1103 {
1104 	int			 opt = 1;
1105 	struct sockaddr		*sa;
1106 
1107 	/*
1108 	 * we do not need the overcomplicated collision detection RFC 1771
1109 	 * describes; we simply make sure there is only ever one concurrent
1110 	 * tcp connection per peer.
1111 	 */
1112 	if (peer->fd != -1)
1113 		return (-1);
1114 
1115 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1116 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1117 		log_peer_warn(&peer->conf, "session_connect socket");
1118 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1119 		return (-1);
1120 	}
1121 
1122 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1123 		log_peer_warnx(&peer->conf,
1124 		    "ipsec or md5sig configured but not available");
1125 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1126 		return (-1);
1127 	}
1128 
1129 	if (peer->conf.auth.method == AUTH_MD5SIG) {
1130 		if (sysdep.no_md5sig) {
1131 			log_peer_warnx(&peer->conf,
1132 			    "md5sig configured but not available");
1133 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1134 			return (-1);
1135 		}
1136 		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1137 		    &opt, sizeof(opt)) == -1) {
1138 			log_peer_warn(&peer->conf, "setsockopt md5sig");
1139 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1140 			return (-1);
1141 		}
1142 	}
1143 	peer->wbuf.fd = peer->fd;
1144 
1145 	/* if update source is set we need to bind() */
1146 	if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) {
1147 		if (bind(peer->fd, sa, sa->sa_len) == -1) {
1148 			log_peer_warn(&peer->conf, "session_connect bind");
1149 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1150 			return (-1);
1151 		}
1152 	}
1153 
1154 	if (session_setup_socket(peer)) {
1155 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1156 		return (-1);
1157 	}
1158 
1159 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT);
1160 	if (connect(peer->fd, sa, sa->sa_len) == -1) {
1161 		if (errno != EINPROGRESS) {
1162 			if (errno != peer->lasterr)
1163 				log_peer_warn(&peer->conf, "connect");
1164 			peer->lasterr = errno;
1165 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1166 			return (-1);
1167 		}
1168 	} else
1169 		bgp_fsm(peer, EVNT_CON_OPEN);
1170 
1171 	return (0);
1172 }
1173 
1174 int
1175 session_setup_socket(struct peer *p)
1176 {
1177 	int	ttl = p->conf.distance;
1178 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1179 	int	nodelay = 1;
1180 	int	bsize;
1181 
1182 	switch (p->conf.remote_addr.aid) {
1183 	case AID_INET:
1184 		/* set precedence, see RFC 1771 appendix 5 */
1185 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1186 		    -1) {
1187 			log_peer_warn(&p->conf,
1188 			    "session_setup_socket setsockopt TOS");
1189 			return (-1);
1190 		}
1191 
1192 		if (p->conf.ebgp) {
1193 			/* set TTL to foreign router's distance
1194 			   1=direct n=multihop with ttlsec, we always use 255 */
1195 			if (p->conf.ttlsec) {
1196 				ttl = 256 - p->conf.distance;
1197 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1198 				    &ttl, sizeof(ttl)) == -1) {
1199 					log_peer_warn(&p->conf,
1200 					    "session_setup_socket: "
1201 					    "setsockopt MINTTL");
1202 					return (-1);
1203 				}
1204 				ttl = 255;
1205 			}
1206 
1207 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1208 			    sizeof(ttl)) == -1) {
1209 				log_peer_warn(&p->conf,
1210 				    "session_setup_socket setsockopt TTL");
1211 				return (-1);
1212 			}
1213 		}
1214 		break;
1215 	case AID_INET6:
1216 		if (p->conf.ebgp) {
1217 			/* set hoplimit to foreign router's distance
1218 			   1=direct n=multihop with ttlsec, we always use 255 */
1219 			if (p->conf.ttlsec) {
1220 				ttl = 256 - p->conf.distance;
1221 				if (setsockopt(p->fd, IPPROTO_IPV6,
1222 				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1223 				    == -1) {
1224 					log_peer_warn(&p->conf,
1225 					    "session_setup_socket: "
1226 					    "setsockopt MINHOPCOUNT");
1227 					return (-1);
1228 				}
1229 				ttl = 255;
1230 			}
1231 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1232 			    &ttl, sizeof(ttl)) == -1) {
1233 				log_peer_warn(&p->conf,
1234 				    "session_setup_socket setsockopt hoplimit");
1235 				return (-1);
1236 			}
1237 		}
1238 		break;
1239 	}
1240 
1241 	/* set TCP_NODELAY */
1242 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1243 	    sizeof(nodelay)) == -1) {
1244 		log_peer_warn(&p->conf,
1245 		    "session_setup_socket setsockopt TCP_NODELAY");
1246 		return (-1);
1247 	}
1248 
1249 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1250 	if (p->conf.auth.method != AUTH_NONE) {
1251 		/* try to increase bufsize. no biggie if it fails */
1252 		bsize = 65535;
1253 		while (bsize > 8192 &&
1254 		    setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1255 		    sizeof(bsize)) == -1 && errno != EINVAL)
1256 			bsize /= 2;
1257 		bsize = 65535;
1258 		while (bsize > 8192 &&
1259 		    setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1260 		    sizeof(bsize)) == -1 && errno != EINVAL)
1261 			bsize /= 2;
1262 	}
1263 
1264 	return (0);
1265 }
1266 
1267 void
1268 session_tcp_established(struct peer *peer)
1269 {
1270 	socklen_t	len;
1271 
1272 	len = sizeof(peer->sa_local);
1273 	if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local,
1274 	    &len) == -1)
1275 		log_warn("getsockname");
1276 	len = sizeof(peer->sa_remote);
1277 	if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote,
1278 	    &len) == -1)
1279 		log_warn("getpeername");
1280 }
1281 
1282 void
1283 session_capa_ann_none(struct peer *peer)
1284 {
1285 	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1286 }
1287 
1288 int
1289 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len)
1290 {
1291 	int errs = 0;
1292 
1293 	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1294 	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1295 	return (errs);
1296 }
1297 
1298 int
1299 session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
1300 {
1301 	u_int8_t		 safi, pad = 0;
1302 	u_int16_t		 afi;
1303 	int			 errs = 0;
1304 
1305 	if (aid2afi(aid, &afi, &safi) == -1)
1306 		fatalx("session_capa_add_mp: bad afi/safi pair");
1307 	afi = htons(afi);
1308 	errs += ibuf_add(buf, &afi, sizeof(afi));
1309 	errs += ibuf_add(buf, &pad, sizeof(pad));
1310 	errs += ibuf_add(buf, &safi, sizeof(safi));
1311 
1312 	return (errs);
1313 }
1314 
1315 int
1316 session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid)
1317 {
1318 	u_int		errs = 0;
1319 	u_int16_t	afi;
1320 	u_int8_t	flags, safi;
1321 
1322 	if (aid2afi(aid, &afi, &safi)) {
1323 		log_warn("session_capa_add_gr: bad AID");
1324 		return (1);
1325 	}
1326 	if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING)
1327 		flags = CAPA_GR_F_FLAG;
1328 	else
1329 		flags = 0;
1330 
1331 	afi = htons(afi);
1332 	errs += ibuf_add(b, &afi, sizeof(afi));
1333 	errs += ibuf_add(b, &safi, sizeof(safi));
1334 	errs += ibuf_add(b, &flags, sizeof(flags));
1335 
1336 	return (errs);
1337 }
1338 
1339 struct bgp_msg *
1340 session_newmsg(enum msg_type msgtype, u_int16_t len)
1341 {
1342 	struct bgp_msg		*msg;
1343 	struct msg_header	 hdr;
1344 	struct ibuf		*buf;
1345 	int			 errs = 0;
1346 
1347 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1348 	hdr.len = htons(len);
1349 	hdr.type = msgtype;
1350 
1351 	if ((buf = ibuf_open(len)) == NULL)
1352 		return (NULL);
1353 
1354 	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1355 	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1356 	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1357 
1358 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1359 		ibuf_free(buf);
1360 		return (NULL);
1361 	}
1362 
1363 	msg->buf = buf;
1364 	msg->type = msgtype;
1365 	msg->len = len;
1366 
1367 	return (msg);
1368 }
1369 
1370 int
1371 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1372 {
1373 	struct mrt		*mrt;
1374 
1375 	LIST_FOREACH(mrt, &mrthead, entry) {
1376 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1377 		    mrt->type == MRT_UPDATE_OUT)))
1378 			continue;
1379 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1380 		    mrt->peer_id == p->conf.id || (mrt->group_id == 0 &&
1381 		    mrt->group_id == p->conf.groupid))
1382 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1383 	}
1384 
1385 	ibuf_close(&p->wbuf, msg->buf);
1386 	free(msg);
1387 	return (0);
1388 }
1389 
1390 void
1391 session_open(struct peer *p)
1392 {
1393 	struct bgp_msg		*buf;
1394 	struct ibuf		*opb;
1395 	struct msg_open		 msg;
1396 	u_int16_t		 len;
1397 	u_int8_t		 i, op_type, optparamlen = 0;
1398 	int			 errs = 0;
1399 	int			 mpcapa = 0;
1400 
1401 
1402 	if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1403 	    sizeof(optparamlen))) == NULL) {
1404 		bgp_fsm(p, EVNT_CON_FATAL);
1405 		return;
1406 	}
1407 
1408 	/* multiprotocol extensions, RFC 4760 */
1409 	for (i = 0; i < AID_MAX; i++)
1410 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1411 			errs += session_capa_add(opb, CAPA_MP, 4);
1412 			errs += session_capa_add_mp(opb, i);
1413 			mpcapa++;
1414 		}
1415 
1416 	/* route refresh, RFC 2918 */
1417 	if (p->capa.ann.refresh)	/* no data */
1418 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1419 
1420 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1421 	if (p->capa.ann.grestart.restart) {
1422 		int		rst = 0;
1423 		u_int16_t	hdr;
1424 		u_int8_t	grlen;
1425 
1426 		if (mpcapa) {
1427 			grlen = 2 + 4 * mpcapa;
1428 			for (i = 0; i < AID_MAX; i++) {
1429 				if (p->capa.neg.grestart.flags[i] &
1430 				    CAPA_GR_RESTARTING)
1431 					rst++;
1432 			}
1433 		} else {	/* AID_INET */
1434 			grlen = 2 + 4;
1435 			if (p->capa.neg.grestart.flags[AID_INET] &
1436 			    CAPA_GR_RESTARTING)
1437 				rst++;
1438 		}
1439 
1440 		hdr = conf->holdtime;		/* default timeout */
1441 		/* if client does graceful restart don't set R flag */
1442 		if (!rst)
1443 			hdr |= CAPA_GR_R_FLAG;
1444 		hdr = htons(hdr);
1445 
1446 		errs += session_capa_add(opb, CAPA_RESTART, grlen);
1447 		errs += ibuf_add(opb, &hdr, sizeof(hdr));
1448 
1449 		if (mpcapa) {
1450 			for (i = 0; i < AID_MAX; i++) {
1451 				if (p->capa.ann.mp[i]) {
1452 					errs += session_capa_add_gr(p, opb, i);
1453 				}
1454 			}
1455 		} else {	/* AID_INET */
1456 			errs += session_capa_add_gr(p, opb, AID_INET);
1457 		}
1458 	}
1459 
1460 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1461 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1462 		u_int32_t	nas;
1463 
1464 		nas = htonl(conf->as);
1465 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1466 		errs += ibuf_add(opb, &nas, sizeof(nas));
1467 	}
1468 
1469 	if (ibuf_size(opb))
1470 		optparamlen = ibuf_size(opb) + sizeof(op_type) +
1471 		    sizeof(optparamlen);
1472 
1473 	len = MSGSIZE_OPEN_MIN + optparamlen;
1474 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1475 		ibuf_free(opb);
1476 		bgp_fsm(p, EVNT_CON_FATAL);
1477 		return;
1478 	}
1479 
1480 	msg.version = 4;
1481 	msg.myas = htons(conf->short_as);
1482 	if (p->conf.holdtime)
1483 		msg.holdtime = htons(p->conf.holdtime);
1484 	else
1485 		msg.holdtime = htons(conf->holdtime);
1486 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1487 	msg.optparamlen = optparamlen;
1488 
1489 	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1490 	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1491 	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1492 	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1493 	errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1494 
1495 	if (optparamlen) {
1496 		op_type = OPT_PARAM_CAPABILITIES;
1497 		optparamlen = ibuf_size(opb);
1498 		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1499 		errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1500 		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1501 	}
1502 
1503 	ibuf_free(opb);
1504 
1505 	if (errs) {
1506 		ibuf_free(buf->buf);
1507 		free(buf);
1508 		bgp_fsm(p, EVNT_CON_FATAL);
1509 		return;
1510 	}
1511 
1512 	if (session_sendmsg(buf, p) == -1) {
1513 		bgp_fsm(p, EVNT_CON_FATAL);
1514 		return;
1515 	}
1516 
1517 	p->stats.msg_sent_open++;
1518 }
1519 
1520 void
1521 session_keepalive(struct peer *p)
1522 {
1523 	struct bgp_msg		*buf;
1524 
1525 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1526 	    session_sendmsg(buf, p) == -1) {
1527 		bgp_fsm(p, EVNT_CON_FATAL);
1528 		return;
1529 	}
1530 
1531 	start_timer_keepalive(p);
1532 	p->stats.msg_sent_keepalive++;
1533 }
1534 
1535 void
1536 session_update(u_int32_t peerid, void *data, size_t datalen)
1537 {
1538 	struct peer		*p;
1539 	struct bgp_msg		*buf;
1540 
1541 	if ((p = getpeerbyid(peerid)) == NULL) {
1542 		log_warnx("no such peer: id=%u", peerid);
1543 		return;
1544 	}
1545 
1546 	if (p->state != STATE_ESTABLISHED)
1547 		return;
1548 
1549 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1550 		bgp_fsm(p, EVNT_CON_FATAL);
1551 		return;
1552 	}
1553 
1554 	if (ibuf_add(buf->buf, data, datalen)) {
1555 		ibuf_free(buf->buf);
1556 		free(buf);
1557 		bgp_fsm(p, EVNT_CON_FATAL);
1558 		return;
1559 	}
1560 
1561 	if (session_sendmsg(buf, p) == -1) {
1562 		bgp_fsm(p, EVNT_CON_FATAL);
1563 		return;
1564 	}
1565 
1566 	start_timer_keepalive(p);
1567 	p->stats.msg_sent_update++;
1568 }
1569 
1570 void
1571 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1572     void *data, ssize_t datalen)
1573 {
1574 	struct bgp_msg		*buf;
1575 	int			 errs = 0;
1576 
1577 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1578 		return;
1579 
1580 	log_notification(p, errcode, subcode, data, datalen, "sending");
1581 
1582 	if ((buf = session_newmsg(NOTIFICATION,
1583 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1584 		bgp_fsm(p, EVNT_CON_FATAL);
1585 		return;
1586 	}
1587 
1588 	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1589 	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1590 
1591 	if (datalen > 0)
1592 		errs += ibuf_add(buf->buf, data, datalen);
1593 
1594 	if (errs) {
1595 		ibuf_free(buf->buf);
1596 		free(buf);
1597 		bgp_fsm(p, EVNT_CON_FATAL);
1598 		return;
1599 	}
1600 
1601 	if (session_sendmsg(buf, p) == -1) {
1602 		bgp_fsm(p, EVNT_CON_FATAL);
1603 		return;
1604 	}
1605 
1606 	p->stats.msg_sent_notification++;
1607 	p->stats.last_sent_errcode = errcode;
1608 	p->stats.last_sent_suberr = subcode;
1609 }
1610 
1611 int
1612 session_neighbor_rrefresh(struct peer *p)
1613 {
1614 	u_int8_t	i;
1615 
1616 	if (!p->capa.peer.refresh)
1617 		return (-1);
1618 
1619 	for (i = 0; i < AID_MAX; i++) {
1620 		if (p->capa.peer.mp[i] != 0)
1621 			session_rrefresh(p, i);
1622 	}
1623 
1624 	return (0);
1625 }
1626 
1627 void
1628 session_rrefresh(struct peer *p, u_int8_t aid)
1629 {
1630 	struct bgp_msg		*buf;
1631 	int			 errs = 0;
1632 	u_int16_t		 afi;
1633 	u_int8_t		 safi, null8 = 0;
1634 
1635 	if (aid2afi(aid, &afi, &safi) == -1)
1636 		fatalx("session_rrefresh: bad afi/safi pair");
1637 
1638 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1639 		bgp_fsm(p, EVNT_CON_FATAL);
1640 		return;
1641 	}
1642 
1643 	afi = htons(afi);
1644 	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1645 	errs += ibuf_add(buf->buf, &null8, sizeof(null8));
1646 	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1647 
1648 	if (errs) {
1649 		ibuf_free(buf->buf);
1650 		free(buf);
1651 		bgp_fsm(p, EVNT_CON_FATAL);
1652 		return;
1653 	}
1654 
1655 	if (session_sendmsg(buf, p) == -1) {
1656 		bgp_fsm(p, EVNT_CON_FATAL);
1657 		return;
1658 	}
1659 
1660 	p->stats.msg_sent_rrefresh++;
1661 }
1662 
1663 int
1664 session_graceful_restart(struct peer *p)
1665 {
1666 	u_int8_t	i;
1667 
1668 	timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout);
1669 
1670 	for (i = 0; i < AID_MAX; i++) {
1671 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1672 			if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE,
1673 			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1674 				return (-1);
1675 			log_peer_warnx(&p->conf,
1676 			    "graceful restart of %s, keeping routes",
1677 			    aid2str(i));
1678 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1679 		} else if (p->capa.neg.mp[i]) {
1680 			if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
1681 			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1682 				return (-1);
1683 			log_peer_warnx(&p->conf,
1684 			    "graceful restart of %s, flushing routes",
1685 			    aid2str(i));
1686 		}
1687 	}
1688 	return (0);
1689 }
1690 
1691 int
1692 session_graceful_stop(struct peer *p)
1693 {
1694 	u_int8_t	i;
1695 
1696 	for (i = 0; i < AID_MAX; i++) {
1697 		/*
1698 		 * Only flush if the peer is restarting and the timeout fired.
1699 		 * In all other cases the session was already flushed when the
1700 		 * session went down or when the new open message was parsed.
1701 		 */
1702 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1703 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1704 			    "time-out, flushing", aid2str(i));
1705 			if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
1706 			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1707 				return (-1);
1708 		}
1709 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1710 	}
1711 	return (0);
1712 }
1713 
1714 int
1715 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1716 {
1717 	ssize_t		n;
1718 	socklen_t	len;
1719 	int		error;
1720 
1721 	if (p->state == STATE_CONNECT) {
1722 		if (pfd->revents & POLLOUT) {
1723 			if (pfd->revents & POLLIN) {
1724 				/* error occurred */
1725 				len = sizeof(error);
1726 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1727 				    &error, &len) == -1 || error) {
1728 					if (error)
1729 						errno = error;
1730 					if (errno != p->lasterr) {
1731 						log_peer_warn(&p->conf,
1732 						    "socket error");
1733 						p->lasterr = errno;
1734 					}
1735 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1736 					return (1);
1737 				}
1738 			}
1739 			bgp_fsm(p, EVNT_CON_OPEN);
1740 			return (1);
1741 		}
1742 		if (pfd->revents & POLLHUP) {
1743 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1744 			return (1);
1745 		}
1746 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1747 			bgp_fsm(p, EVNT_CON_FATAL);
1748 			return (1);
1749 		}
1750 		return (0);
1751 	}
1752 
1753 	if (pfd->revents & POLLHUP) {
1754 		bgp_fsm(p, EVNT_CON_CLOSED);
1755 		return (1);
1756 	}
1757 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1758 		bgp_fsm(p, EVNT_CON_FATAL);
1759 		return (1);
1760 	}
1761 
1762 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1763 		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1764 			if (error == 0)
1765 				log_peer_warnx(&p->conf, "Connection closed");
1766 			else if (error == -1)
1767 				log_peer_warn(&p->conf, "write error");
1768 			bgp_fsm(p, EVNT_CON_FATAL);
1769 			return (1);
1770 		}
1771 		if (!(pfd->revents & POLLIN))
1772 			return (1);
1773 	}
1774 
1775 	if (p->rbuf && pfd->revents & POLLIN) {
1776 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1777 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1778 			if (errno != EINTR && errno != EAGAIN) {
1779 				log_peer_warn(&p->conf, "read error");
1780 				bgp_fsm(p, EVNT_CON_FATAL);
1781 			}
1782 			return (1);
1783 		}
1784 		if (n == 0) {	/* connection closed */
1785 			bgp_fsm(p, EVNT_CON_CLOSED);
1786 			return (1);
1787 		}
1788 
1789 		p->rbuf->wpos += n;
1790 		p->stats.last_read = time(NULL);
1791 		return (1);
1792 	}
1793 	return (0);
1794 }
1795 
1796 int
1797 session_process_msg(struct peer *p)
1798 {
1799 	ssize_t		rpos, av, left;
1800 	int		processed = 0;
1801 	u_int16_t	msglen;
1802 	u_int8_t	msgtype;
1803 
1804 	rpos = 0;
1805 	av = p->rbuf->wpos;
1806 
1807 	/*
1808 	 * session might drop to IDLE -> buffers deallocated
1809 	 * we MUST check rbuf != NULL before use
1810 	 */
1811 	for (;;) {
1812 		if (rpos + MSGSIZE_HEADER > av)
1813 			break;
1814 		if (p->rbuf == NULL)
1815 			break;
1816 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1817 		    &msgtype) == -1)
1818 			return (0);
1819 		if (rpos + msglen > av)
1820 			break;
1821 		p->rbuf->rptr = p->rbuf->buf + rpos;
1822 
1823 		switch (msgtype) {
1824 		case OPEN:
1825 			bgp_fsm(p, EVNT_RCVD_OPEN);
1826 			p->stats.msg_rcvd_open++;
1827 			break;
1828 		case UPDATE:
1829 			bgp_fsm(p, EVNT_RCVD_UPDATE);
1830 			p->stats.msg_rcvd_update++;
1831 			break;
1832 		case NOTIFICATION:
1833 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1834 			p->stats.msg_rcvd_notification++;
1835 			break;
1836 		case KEEPALIVE:
1837 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1838 			p->stats.msg_rcvd_keepalive++;
1839 			break;
1840 		case RREFRESH:
1841 			parse_refresh(p);
1842 			p->stats.msg_rcvd_rrefresh++;
1843 			break;
1844 		default:	/* cannot happen */
1845 			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
1846 			    &msgtype, 1);
1847 			log_warnx("received message with unknown type %u",
1848 			    msgtype);
1849 			bgp_fsm(p, EVNT_CON_FATAL);
1850 		}
1851 		rpos += msglen;
1852 		if (++processed > MSG_PROCESS_LIMIT)
1853 			break;
1854 	}
1855 	if (p->rbuf == NULL)
1856 		return (1);
1857 
1858 	if (rpos < av) {
1859 		left = av - rpos;
1860 		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1861 		p->rbuf->wpos = left;
1862 	} else
1863 		p->rbuf->wpos = 0;
1864 
1865 	return (1);
1866 }
1867 
1868 int
1869 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1870 {
1871 	struct mrt		*mrt;
1872 	u_char			*p;
1873 	u_int16_t		 olen;
1874 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1875 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1876 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1877 
1878 	/* caller MUST make sure we are getting 19 bytes! */
1879 	p = data;
1880 	if (memcmp(p, marker, sizeof(marker))) {
1881 		log_peer_warnx(&peer->conf, "sync error");
1882 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1883 		bgp_fsm(peer, EVNT_CON_FATAL);
1884 		return (-1);
1885 	}
1886 	p += MSGSIZE_HEADER_MARKER;
1887 
1888 	memcpy(&olen, p, 2);
1889 	*len = ntohs(olen);
1890 	p += 2;
1891 	memcpy(type, p, 1);
1892 
1893 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1894 		log_peer_warnx(&peer->conf,
1895 		    "received message: illegal length: %u byte", *len);
1896 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1897 		    &olen, sizeof(olen));
1898 		bgp_fsm(peer, EVNT_CON_FATAL);
1899 		return (-1);
1900 	}
1901 
1902 	switch (*type) {
1903 	case OPEN:
1904 		if (*len < MSGSIZE_OPEN_MIN) {
1905 			log_peer_warnx(&peer->conf,
1906 			    "received OPEN: illegal len: %u byte", *len);
1907 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1908 			    &olen, sizeof(olen));
1909 			bgp_fsm(peer, EVNT_CON_FATAL);
1910 			return (-1);
1911 		}
1912 		break;
1913 	case NOTIFICATION:
1914 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1915 			log_peer_warnx(&peer->conf,
1916 			    "received NOTIFICATION: illegal len: %u byte",
1917 			    *len);
1918 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1919 			    &olen, sizeof(olen));
1920 			bgp_fsm(peer, EVNT_CON_FATAL);
1921 			return (-1);
1922 		}
1923 		break;
1924 	case UPDATE:
1925 		if (*len < MSGSIZE_UPDATE_MIN) {
1926 			log_peer_warnx(&peer->conf,
1927 			    "received UPDATE: illegal len: %u byte", *len);
1928 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1929 			    &olen, sizeof(olen));
1930 			bgp_fsm(peer, EVNT_CON_FATAL);
1931 			return (-1);
1932 		}
1933 		break;
1934 	case KEEPALIVE:
1935 		if (*len != MSGSIZE_KEEPALIVE) {
1936 			log_peer_warnx(&peer->conf,
1937 			    "received KEEPALIVE: illegal len: %u byte", *len);
1938 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1939 			    &olen, sizeof(olen));
1940 			bgp_fsm(peer, EVNT_CON_FATAL);
1941 			return (-1);
1942 		}
1943 		break;
1944 	case RREFRESH:
1945 		if (*len != MSGSIZE_RREFRESH) {
1946 			log_peer_warnx(&peer->conf,
1947 			    "received RREFRESH: illegal len: %u byte", *len);
1948 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1949 			    &olen, sizeof(olen));
1950 			bgp_fsm(peer, EVNT_CON_FATAL);
1951 			return (-1);
1952 		}
1953 		break;
1954 	default:
1955 		log_peer_warnx(&peer->conf,
1956 		    "received msg with unknown type %u", *type);
1957 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1958 		    type, 1);
1959 		bgp_fsm(peer, EVNT_CON_FATAL);
1960 		return (-1);
1961 	}
1962 	LIST_FOREACH(mrt, &mrthead, entry) {
1963 		if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE &&
1964 		    mrt->type == MRT_UPDATE_IN)))
1965 			continue;
1966 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1967 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1968 		    mrt->group_id == peer->conf.groupid))
1969 			mrt_dump_bgp_msg(mrt, data, *len, peer);
1970 	}
1971 	return (0);
1972 }
1973 
1974 int
1975 parse_open(struct peer *peer)
1976 {
1977 	u_char		*p, *op_val;
1978 	u_int8_t	 version, rversion;
1979 	u_int16_t	 short_as, msglen;
1980 	u_int16_t	 holdtime, oholdtime, myholdtime;
1981 	u_int32_t	 as, bgpid;
1982 	u_int8_t	 optparamlen, plen;
1983 	u_int8_t	 op_type, op_len;
1984 
1985 	p = peer->rbuf->rptr;
1986 	p += MSGSIZE_HEADER_MARKER;
1987 	memcpy(&msglen, p, sizeof(msglen));
1988 	msglen = ntohs(msglen);
1989 
1990 	p = peer->rbuf->rptr;
1991 	p += MSGSIZE_HEADER;	/* header is already checked */
1992 
1993 	memcpy(&version, p, sizeof(version));
1994 	p += sizeof(version);
1995 
1996 	if (version != BGP_VERSION) {
1997 		log_peer_warnx(&peer->conf,
1998 		    "peer wants unrecognized version %u", version);
1999 		if (version > BGP_VERSION)
2000 			rversion = version - BGP_VERSION;
2001 		else
2002 			rversion = BGP_VERSION;
2003 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
2004 		    &rversion, sizeof(rversion));
2005 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2006 		return (-1);
2007 	}
2008 
2009 	memcpy(&short_as, p, sizeof(short_as));
2010 	p += sizeof(short_as);
2011 	as = peer->short_as = ntohs(short_as);
2012 
2013 	memcpy(&oholdtime, p, sizeof(oholdtime));
2014 	p += sizeof(oholdtime);
2015 
2016 	holdtime = ntohs(oholdtime);
2017 	if (holdtime && holdtime < peer->conf.min_holdtime) {
2018 		log_peer_warnx(&peer->conf,
2019 		    "peer requests unacceptable holdtime %u", holdtime);
2020 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
2021 		    NULL, 0);
2022 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2023 		return (-1);
2024 	}
2025 
2026 	myholdtime = peer->conf.holdtime;
2027 	if (!myholdtime)
2028 		myholdtime = conf->holdtime;
2029 	if (holdtime < myholdtime)
2030 		peer->holdtime = holdtime;
2031 	else
2032 		peer->holdtime = myholdtime;
2033 
2034 	memcpy(&bgpid, p, sizeof(bgpid));
2035 	p += sizeof(bgpid);
2036 
2037 	/* check bgpid for validity - just disallow 0 */
2038 	if (ntohl(bgpid) == 0) {
2039 		log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable",
2040 		    ntohl(bgpid));
2041 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2042 		    NULL, 0);
2043 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2044 		return (-1);
2045 	}
2046 	peer->remote_bgpid = bgpid;
2047 
2048 	memcpy(&optparamlen, p, sizeof(optparamlen));
2049 	p += sizeof(optparamlen);
2050 
2051 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
2052 			log_peer_warnx(&peer->conf,
2053 			    "corrupt OPEN message received: length mismatch");
2054 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2055 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2056 			return (-1);
2057 	}
2058 
2059 	plen = optparamlen;
2060 	while (plen > 0) {
2061 		if (plen < 2) {
2062 			log_peer_warnx(&peer->conf,
2063 			    "corrupt OPEN message received, len wrong");
2064 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2065 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2066 			return (-1);
2067 		}
2068 		memcpy(&op_type, p, sizeof(op_type));
2069 		p += sizeof(op_type);
2070 		plen -= sizeof(op_type);
2071 		memcpy(&op_len, p, sizeof(op_len));
2072 		p += sizeof(op_len);
2073 		plen -= sizeof(op_len);
2074 		if (op_len > 0) {
2075 			if (plen < op_len) {
2076 				log_peer_warnx(&peer->conf,
2077 				    "corrupt OPEN message received, len wrong");
2078 				session_notification(peer, ERR_OPEN, 0,
2079 				    NULL, 0);
2080 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2081 				return (-1);
2082 			}
2083 			op_val = p;
2084 			p += op_len;
2085 			plen -= op_len;
2086 		} else
2087 			op_val = NULL;
2088 
2089 		switch (op_type) {
2090 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2091 			if (parse_capabilities(peer, op_val, op_len,
2092 			    &as) == -1) {
2093 				session_notification(peer, ERR_OPEN, 0,
2094 				    NULL, 0);
2095 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2096 				return (-1);
2097 			}
2098 			break;
2099 		case OPT_PARAM_AUTH:			/* deprecated */
2100 		default:
2101 			/*
2102 			 * unsupported type
2103 			 * the RFCs tell us to leave the data section empty
2104 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
2105 			 * How the peer should know _which_ optional parameter
2106 			 * we don't support is beyond me.
2107 			 */
2108 			log_peer_warnx(&peer->conf,
2109 			    "received OPEN message with unsupported optional "
2110 			    "parameter: type %u", op_type);
2111 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
2112 				NULL, 0);
2113 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2114 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
2115 			peer->IdleHoldTime /= 2;
2116 			return (-1);
2117 		}
2118 	}
2119 
2120 	/* if remote-as is zero and it's a cloned neighbor, accept any */
2121 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2122 		peer->conf.remote_as = as;
2123 		peer->conf.ebgp = (peer->conf.remote_as != conf->as);
2124 		if (!peer->conf.ebgp)
2125 			/* force enforce_as off for iBGP sessions */
2126 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2127 	}
2128 
2129 	if (peer->conf.remote_as != as) {
2130 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2131 		    log_as(as));
2132 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2133 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2134 		return (-1);
2135 	}
2136 
2137 	if (capa_neg_calc(peer) == -1) {
2138 		log_peer_warnx(&peer->conf,
2139 		    "capability negotiation calculation failed");
2140 		session_notification(peer, ERR_OPEN, 0, NULL, 0);
2141 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2142 		return (-1);
2143 	}
2144 
2145 	return (0);
2146 }
2147 
2148 int
2149 parse_update(struct peer *peer)
2150 {
2151 	u_char		*p;
2152 	u_int16_t	 datalen;
2153 
2154 	/*
2155 	 * we pass the message verbatim to the rde.
2156 	 * in case of errors the whole session is reset with a
2157 	 * notification anyway, we only need to know the peer
2158 	 */
2159 	p = peer->rbuf->rptr;
2160 	p += MSGSIZE_HEADER_MARKER;
2161 	memcpy(&datalen, p, sizeof(datalen));
2162 	datalen = ntohs(datalen);
2163 
2164 	p = peer->rbuf->rptr;
2165 	p += MSGSIZE_HEADER;	/* header is already checked */
2166 	datalen -= MSGSIZE_HEADER;
2167 
2168 	if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p,
2169 	    datalen) == -1)
2170 		return (-1);
2171 
2172 	return (0);
2173 }
2174 
2175 int
2176 parse_refresh(struct peer *peer)
2177 {
2178 	u_char		*p;
2179 	u_int16_t	 afi;
2180 	u_int8_t	 aid, safi;
2181 
2182 	p = peer->rbuf->rptr;
2183 	p += MSGSIZE_HEADER;	/* header is already checked */
2184 
2185 	/*
2186 	 * We could check if we actually announced the capability but
2187 	 * as long as the message is correctly encoded we don't care.
2188 	 */
2189 
2190 	/* afi, 2 byte */
2191 	memcpy(&afi, p, sizeof(afi));
2192 	afi = ntohs(afi);
2193 	p += 2;
2194 	/* reserved, 1 byte */
2195 	p += 1;
2196 	/* safi, 1 byte */
2197 	memcpy(&safi, p, sizeof(safi));
2198 
2199 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2200 	if (afi2aid(afi, safi, &aid) == -1) {
2201 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2202 		    "invalid afi/safi pair");
2203 		return (0);
2204 	}
2205 
2206 	if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid,
2207 	    sizeof(aid)) == -1)
2208 		return (-1);
2209 
2210 	return (0);
2211 }
2212 
2213 int
2214 parse_notification(struct peer *peer)
2215 {
2216 	u_char		*p;
2217 	u_int16_t	 datalen;
2218 	u_int8_t	 errcode;
2219 	u_int8_t	 subcode;
2220 	u_int8_t	 capa_code;
2221 	u_int8_t	 capa_len;
2222 	u_int8_t	 i;
2223 
2224 	/* just log */
2225 	p = peer->rbuf->rptr;
2226 	p += MSGSIZE_HEADER_MARKER;
2227 	memcpy(&datalen, p, sizeof(datalen));
2228 	datalen = ntohs(datalen);
2229 
2230 	p = peer->rbuf->rptr;
2231 	p += MSGSIZE_HEADER;	/* header is already checked */
2232 	datalen -= MSGSIZE_HEADER;
2233 
2234 	memcpy(&errcode, p, sizeof(errcode));
2235 	p += sizeof(errcode);
2236 	datalen -= sizeof(errcode);
2237 
2238 	memcpy(&subcode, p, sizeof(subcode));
2239 	p += sizeof(subcode);
2240 	datalen -= sizeof(subcode);
2241 
2242 	log_notification(peer, errcode, subcode, p, datalen, "received");
2243 	peer->errcnt++;
2244 
2245 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2246 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2247 			log_peer_warnx(&peer->conf, "received \"unsupported "
2248 			    "capability\" notification without data part, "
2249 			    "disabling capability announcements altogether");
2250 			session_capa_ann_none(peer);
2251 		}
2252 
2253 		while (datalen > 0) {
2254 			if (datalen < 2) {
2255 				log_peer_warnx(&peer->conf,
2256 				    "parse_notification: "
2257 				    "expect len >= 2, len is %u", datalen);
2258 				return (-1);
2259 			}
2260 			memcpy(&capa_code, p, sizeof(capa_code));
2261 			p += sizeof(capa_code);
2262 			datalen -= sizeof(capa_code);
2263 			memcpy(&capa_len, p, sizeof(capa_len));
2264 			p += sizeof(capa_len);
2265 			datalen -= sizeof(capa_len);
2266 			if (datalen < capa_len) {
2267 				log_peer_warnx(&peer->conf,
2268 				    "parse_notification: capa_len %u exceeds "
2269 				    "remaining msg length %u", capa_len,
2270 				    datalen);
2271 				return (-1);
2272 			}
2273 			p += capa_len;
2274 			datalen -= capa_len;
2275 			switch (capa_code) {
2276 			case CAPA_MP:
2277 				for (i = 0; i < AID_MAX; i++)
2278 					peer->capa.ann.mp[i] = 0;
2279 				log_peer_warnx(&peer->conf,
2280 				    "disabling multiprotocol capability");
2281 				break;
2282 			case CAPA_REFRESH:
2283 				peer->capa.ann.refresh = 0;
2284 				log_peer_warnx(&peer->conf,
2285 				    "disabling route refresh capability");
2286 				break;
2287 			case CAPA_RESTART:
2288 				peer->capa.ann.grestart.restart = 0;
2289 				log_peer_warnx(&peer->conf,
2290 				    "disabling restart capability");
2291 				break;
2292 			case CAPA_AS4BYTE:
2293 				peer->capa.ann.as4byte = 0;
2294 				log_peer_warnx(&peer->conf,
2295 				    "disabling 4-byte AS num capability");
2296 				break;
2297 			default:	/* should not happen... */
2298 				log_peer_warnx(&peer->conf, "received "
2299 				    "\"unsupported capability\" notification "
2300 				    "for unknown capability %u, disabling "
2301 				    "capability announcements altogether",
2302 				    capa_code);
2303 				session_capa_ann_none(peer);
2304 				break;
2305 			}
2306 		}
2307 
2308 		return (1);
2309 	}
2310 
2311 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2312 		session_capa_ann_none(peer);
2313 		return (1);
2314 	}
2315 
2316 	return (0);
2317 }
2318 
2319 int
2320 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2321 {
2322 	u_char		*capa_val;
2323 	u_int32_t	 remote_as;
2324 	u_int16_t	 len;
2325 	u_int16_t	 afi;
2326 	u_int16_t	 gr_header;
2327 	u_int8_t	 safi;
2328 	u_int8_t	 aid;
2329 	u_int8_t	 gr_flags;
2330 	u_int8_t	 capa_code;
2331 	u_int8_t	 capa_len;
2332 	u_int8_t	 i;
2333 
2334 	len = dlen;
2335 	while (len > 0) {
2336 		if (len < 2) {
2337 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2338 			    "length: %u, too short", len);
2339 			return (-1);
2340 		}
2341 		memcpy(&capa_code, d, sizeof(capa_code));
2342 		d += sizeof(capa_code);
2343 		len -= sizeof(capa_code);
2344 		memcpy(&capa_len, d, sizeof(capa_len));
2345 		d += sizeof(capa_len);
2346 		len -= sizeof(capa_len);
2347 		if (capa_len > 0) {
2348 			if (len < capa_len) {
2349 				log_peer_warnx(&peer->conf,
2350 				    "Bad capabilities attr length: "
2351 				    "len %u smaller than capa_len %u",
2352 				    len, capa_len);
2353 				return (-1);
2354 			}
2355 			capa_val = d;
2356 			d += capa_len;
2357 			len -= capa_len;
2358 		} else
2359 			capa_val = NULL;
2360 
2361 		switch (capa_code) {
2362 		case CAPA_MP:			/* RFC 4760 */
2363 			if (capa_len != 4) {
2364 				log_peer_warnx(&peer->conf,
2365 				    "Bad multi protocol capability length: "
2366 				    "%u", capa_len);
2367 				break;
2368 			}
2369 			memcpy(&afi, capa_val, sizeof(afi));
2370 			afi = ntohs(afi);
2371 			memcpy(&safi, capa_val + 3, sizeof(safi));
2372 			if (afi2aid(afi, safi, &aid) == -1) {
2373 				log_peer_warnx(&peer->conf,
2374 				    "Received multi protocol capability: "
2375 				    " unknown AFI %u, safi %u pair",
2376 				    afi, safi);
2377 				break;
2378 			}
2379 			peer->capa.peer.mp[aid] = 1;
2380 			break;
2381 		case CAPA_REFRESH:
2382 			peer->capa.peer.refresh = 1;
2383 			break;
2384 		case CAPA_RESTART:
2385 			if (capa_len == 2) {
2386 				/* peer only supports EoR marker */
2387 				peer->capa.peer.grestart.restart = 1;
2388 				peer->capa.peer.grestart.timeout = 0;
2389 				break;
2390 			} else if (capa_len % 4 != 2) {
2391 				log_peer_warnx(&peer->conf,
2392 				    "Bad graceful restart capability length: "
2393 				    "%u", capa_len);
2394 				peer->capa.peer.grestart.restart = 0;
2395 				peer->capa.peer.grestart.timeout = 0;
2396 				break;
2397 			}
2398 
2399 			memcpy(&gr_header, capa_val, sizeof(gr_header));
2400 			gr_header = ntohs(gr_header);
2401 			peer->capa.peer.grestart.timeout =
2402 			    gr_header & CAPA_GR_TIMEMASK;
2403 			if (peer->capa.peer.grestart.timeout == 0) {
2404 				log_peer_warnx(&peer->conf, "Received "
2405 				    "graceful restart timeout is zero");
2406 				peer->capa.peer.grestart.restart = 0;
2407 				break;
2408 			}
2409 
2410 			for (i = 2; i <= capa_len - 4; i += 4) {
2411 				memcpy(&afi, capa_val + i, sizeof(afi));
2412 				afi = ntohs(afi);
2413 				memcpy(&safi, capa_val + i + 2, sizeof(safi));
2414 				if (afi2aid(afi, safi, &aid) == -1) {
2415 					log_peer_warnx(&peer->conf,
2416 					    "Received graceful restart capa: "
2417 					    " unknown AFI %u, safi %u pair",
2418 					    afi, safi);
2419 					continue;
2420 				}
2421 				memcpy(&gr_flags, capa_val + i + 3,
2422 				    sizeof(gr_flags));
2423 				peer->capa.peer.grestart.flags[aid] |=
2424 				    CAPA_GR_PRESENT;
2425 				if (gr_flags & CAPA_GR_F_FLAG)
2426 					peer->capa.peer.grestart.flags[aid] |=
2427 					    CAPA_GR_FORWARD;
2428 				if (gr_header & CAPA_GR_R_FLAG)
2429 					peer->capa.peer.grestart.flags[aid] |=
2430 					    CAPA_GR_RESTART;
2431 				peer->capa.peer.grestart.restart = 2;
2432 			}
2433 			break;
2434 		case CAPA_AS4BYTE:
2435 			if (capa_len != 4) {
2436 				log_peer_warnx(&peer->conf,
2437 				    "Bad AS4BYTE capability length: "
2438 				    "%u", capa_len);
2439 				peer->capa.peer.as4byte = 0;
2440 				break;
2441 			}
2442 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2443 			*as = ntohl(remote_as);
2444 			peer->capa.peer.as4byte = 1;
2445 			break;
2446 		default:
2447 			break;
2448 		}
2449 	}
2450 
2451 	return (0);
2452 }
2453 
2454 int
2455 capa_neg_calc(struct peer *p)
2456 {
2457 	u_int8_t	i, hasmp = 0;
2458 
2459 	/* refresh: does not realy matter here, use peer setting */
2460 	p->capa.neg.refresh = p->capa.peer.refresh;
2461 
2462 	/* as4byte: both side must announce capability */
2463 	if (p->capa.ann.as4byte && p->capa.peer.as4byte)
2464 		p->capa.neg.as4byte = 1;
2465 	else
2466 		p->capa.neg.as4byte = 0;
2467 
2468 	/* MP: both side must announce capability */
2469 	for (i = 0; i < AID_MAX; i++) {
2470 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
2471 			p->capa.neg.mp[i] = 1;
2472 			hasmp = 1;
2473 		} else
2474 			p->capa.neg.mp[i] = 0;
2475 	}
2476 	/* if no MP capability present default to IPv4 unicast mode */
2477 	if (!hasmp)
2478 		p->capa.neg.mp[AID_INET] = 1;
2479 
2480 	/*
2481 	 * graceful restart: only the peer capabilities are of interest here.
2482 	 * It is necessary to compare the new values with the previous ones
2483 	 * and act acordingly. AFI/SAFI that are not part in the MP capability
2484 	 * are treated as not being present.
2485 	 */
2486 
2487 	for (i = 0; i < AID_MAX; i++) {
2488 		int8_t	negflags;
2489 
2490 		/* disable GR if the AFI/SAFI is not present */
2491 		if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2492 		    p->capa.neg.mp[i] == 0)
2493 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2494 		/* look at current GR state and decide what to do */
2495 		negflags = p->capa.neg.grestart.flags[i];
2496 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2497 		if (negflags & CAPA_GR_RESTARTING) {
2498 			if (!(p->capa.peer.grestart.flags[i] &
2499 			    CAPA_GR_FORWARD)) {
2500 				if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
2501 				    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
2502 					return (-1);
2503 				log_peer_warnx(&p->conf, "graceful restart of "
2504 				    "%s, not restarted, flushing", aid2str(i));
2505 			} else
2506 				p->capa.neg.grestart.flags[i] |=
2507 				    CAPA_GR_RESTARTING;
2508 		}
2509 	}
2510 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2511 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2512 
2513 	return (0);
2514 }
2515 
2516 void
2517 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2518 {
2519 	struct imsg		 imsg;
2520 	struct mrt		 xmrt;
2521 	struct mrt		*mrt;
2522 	struct imsgbuf		*i;
2523 	struct peer_config	*pconf;
2524 	struct peer		*p, *next;
2525 	struct listen_addr	*la, *nla;
2526 	struct kif		*kif;
2527 	u_char			*data;
2528 	enum reconf_action	 reconf;
2529 	int			 n, fd, depend_ok, restricted;
2530 	u_int8_t		 aid, errcode, subcode;
2531 
2532 	while (ibuf) {
2533 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2534 			fatal("session_dispatch_imsg: imsg_get error");
2535 
2536 		if (n == 0)
2537 			break;
2538 
2539 		switch (imsg.hdr.type) {
2540 		case IMSG_SOCKET_CONN:
2541 		case IMSG_SOCKET_CONN_CTL:
2542 			if (idx != PFD_PIPE_MAIN)
2543 				fatalx("reconf request not from parent");
2544 			if ((fd = imsg.fd) == -1) {
2545 				log_warnx("expected to receive imsg fd to "
2546 				    "RDE but didn't receive any");
2547 				break;
2548 			}
2549 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2550 				fatal(NULL);
2551 			imsg_init(i, fd);
2552 			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
2553 				if (ibuf_rde) {
2554 					log_warnx("Unexpected imsg connection "
2555 					    "to RDE received");
2556 					msgbuf_clear(&ibuf_rde->w);
2557 					free(ibuf_rde);
2558 				}
2559 				ibuf_rde = i;
2560 			} else {
2561 				if (ibuf_rde_ctl) {
2562 					log_warnx("Unexpected imsg ctl "
2563 					    "connection to RDE received");
2564 					msgbuf_clear(&ibuf_rde_ctl->w);
2565 					free(ibuf_rde_ctl);
2566 				}
2567 				ibuf_rde_ctl = i;
2568 			}
2569 			break;
2570 		case IMSG_RECONF_CONF:
2571 			if (idx != PFD_PIPE_MAIN)
2572 				fatalx("reconf request not from parent");
2573 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
2574 			    NULL)
2575 				fatal(NULL);
2576 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
2577 			if ((nconf->listen_addrs = calloc(1,
2578 			    sizeof(struct listen_addrs))) == NULL)
2579 				fatal(NULL);
2580 			TAILQ_INIT(nconf->listen_addrs);
2581 			npeers = NULL;
2582 			init_conf(nconf);
2583 			pending_reconf = 1;
2584 			break;
2585 		case IMSG_RECONF_PEER:
2586 			if (idx != PFD_PIPE_MAIN)
2587 				fatalx("reconf request not from parent");
2588 			pconf = imsg.data;
2589 			p = getpeerbyaddr(&pconf->remote_addr);
2590 			if (p == NULL) {
2591 				if ((p = calloc(1, sizeof(struct peer))) ==
2592 				    NULL)
2593 					fatal("new_peer");
2594 				p->state = p->prev_state = STATE_NONE;
2595 				p->next = npeers;
2596 				npeers = p;
2597 				reconf = RECONF_REINIT;
2598 			} else
2599 				reconf = RECONF_KEEP;
2600 
2601 			memcpy(&p->conf, pconf, sizeof(struct peer_config));
2602 			p->conf.reconf_action = reconf;
2603 
2604 			/* sync the RDE in case we keep the peer */
2605 			if (reconf == RECONF_KEEP) {
2606 				if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD,
2607 				    p->conf.id, 0, -1, &p->conf,
2608 				    sizeof(struct peer_config)) == -1)
2609 					fatalx("imsg_compose error");
2610 				if (p->conf.template) {
2611 					/* apply the conf to all clones */
2612 					struct peer *np;
2613 					for (np = peers; np; np = np->next) {
2614 						if (np->template != p)
2615 							continue;
2616 						session_template_clone(np,
2617 						    NULL, np->conf.id,
2618 						    np->conf.remote_as);
2619 						if (imsg_compose(ibuf_rde,
2620 						    IMSG_SESSION_ADD,
2621 						    np->conf.id, 0, -1,
2622 						    &np->conf,
2623 						    sizeof(struct peer_config))
2624 						    == -1)
2625 							fatalx("imsg_compose error");
2626 					}
2627 				}
2628 			}
2629 			break;
2630 		case IMSG_RECONF_LISTENER:
2631 			if (idx != PFD_PIPE_MAIN)
2632 				fatalx("reconf request not from parent");
2633 			if (nconf == NULL)
2634 				fatalx("IMSG_RECONF_LISTENER but no config");
2635 			nla = imsg.data;
2636 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2637 				if (!la_cmp(la, nla))
2638 					break;
2639 
2640 			if (la == NULL) {
2641 				if (nla->reconf != RECONF_REINIT)
2642 					fatalx("king bula sez: "
2643 					    "expected REINIT");
2644 
2645 				if ((nla->fd = imsg.fd) == -1)
2646 					log_warnx("expected to receive fd for "
2647 					    "%s but didn't receive any",
2648 					    log_sockaddr((struct sockaddr *)
2649 					    &nla->sa));
2650 
2651 				la = calloc(1, sizeof(struct listen_addr));
2652 				if (la == NULL)
2653 					fatal(NULL);
2654 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2655 				la->flags = nla->flags;
2656 				la->fd = nla->fd;
2657 				la->reconf = RECONF_REINIT;
2658 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2659 				    entry);
2660 			} else {
2661 				if (nla->reconf != RECONF_KEEP)
2662 					fatalx("king bula sez: expected KEEP");
2663 				la->reconf = RECONF_KEEP;
2664 			}
2665 
2666 			break;
2667 		case IMSG_RECONF_CTRL:
2668 			if (idx != PFD_PIPE_MAIN)
2669 				fatalx("reconf request not from parent");
2670 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2671 			    sizeof(restricted))
2672 				fatalx("IFINFO imsg with wrong len");
2673 			memcpy(&restricted, imsg.data, sizeof(restricted));
2674 			if (imsg.fd == -1) {
2675 				log_warnx("expected to receive fd for control "
2676 				    "socket but didn't receive any");
2677 				break;
2678 			}
2679 			if (restricted) {
2680 				control_shutdown(rcsock);
2681 				rcsock = imsg.fd;
2682 			} else {
2683 				control_shutdown(csock);
2684 				csock = imsg.fd;
2685 			}
2686 			break;
2687 		case IMSG_RECONF_DONE:
2688 			if (idx != PFD_PIPE_MAIN)
2689 				fatalx("reconf request not from parent");
2690 			if (nconf == NULL)
2691 				fatalx("got IMSG_RECONF_DONE but no config");
2692 			conf->flags = nconf->flags;
2693 			conf->log = nconf->log;
2694 			conf->bgpid = nconf->bgpid;
2695 			conf->clusterid = nconf->clusterid;
2696 			conf->as = nconf->as;
2697 			conf->short_as = nconf->short_as;
2698 			conf->holdtime = nconf->holdtime;
2699 			conf->min_holdtime = nconf->min_holdtime;
2700 			conf->connectretry = nconf->connectretry;
2701 
2702 			/* add new peers */
2703 			for (p = npeers; p != NULL; p = next) {
2704 				next = p->next;
2705 				p->next = peers;
2706 				peers = p;
2707 			}
2708 			/* find ones that need attention */
2709 			for (p = peers; p != NULL; p = p->next) {
2710 				/* needs to be deleted? */
2711 				if (p->conf.reconf_action == RECONF_NONE &&
2712 				    !p->template)
2713 					p->conf.reconf_action = RECONF_DELETE;
2714 				/* had demotion, is demoted, demote removed? */
2715 				if (p->demoted && !p->conf.demote_group[0])
2716 						session_demote(p, -1);
2717 			}
2718 
2719 			/* delete old listeners */
2720 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2721 			    la = nla) {
2722 				nla = TAILQ_NEXT(la, entry);
2723 				if (la->reconf == RECONF_NONE) {
2724 					log_info("not listening on %s any more",
2725 					    log_sockaddr(
2726 					    (struct sockaddr *)&la->sa));
2727 					TAILQ_REMOVE(conf->listen_addrs, la,
2728 					    entry);
2729 					close(la->fd);
2730 					free(la);
2731 				}
2732 			}
2733 
2734 			/* add new listeners */
2735 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2736 			    NULL) {
2737 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2738 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2739 				    entry);
2740 			}
2741 
2742 			setup_listeners(listener_cnt);
2743 			free(nconf->listen_addrs);
2744 			free(nconf);
2745 			nconf = NULL;
2746 			pending_reconf = 0;
2747 			log_info("SE reconfigured");
2748 			imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
2749 			    -1, NULL, 0);
2750 			break;
2751 		case IMSG_IFINFO:
2752 			if (idx != PFD_PIPE_MAIN)
2753 				fatalx("IFINFO message not from parent");
2754 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2755 			    sizeof(struct kif))
2756 				fatalx("IFINFO imsg with wrong len");
2757 			kif = imsg.data;
2758 			depend_ok = (kif->flags & IFF_UP) &&
2759 			    LINK_STATE_IS_UP(kif->link_state);
2760 
2761 			for (p = peers; p != NULL; p = p->next)
2762 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2763 					if (depend_ok && !p->depend_ok) {
2764 						p->depend_ok = depend_ok;
2765 						bgp_fsm(p, EVNT_START);
2766 					} else if (!depend_ok && p->depend_ok) {
2767 						p->depend_ok = depend_ok;
2768 						session_stop(p,
2769 						    ERR_CEASE_OTHER_CHANGE);
2770 					}
2771 				}
2772 			break;
2773 		case IMSG_MRT_OPEN:
2774 		case IMSG_MRT_REOPEN:
2775 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2776 			    sizeof(struct mrt)) {
2777 				log_warnx("wrong imsg len");
2778 				break;
2779 			}
2780 
2781 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2782 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2783 				log_warnx("expected to receive fd for mrt dump "
2784 				    "but didn't receive any");
2785 
2786 			mrt = mrt_get(&mrthead, &xmrt);
2787 			if (mrt == NULL) {
2788 				/* new dump */
2789 				mrt = calloc(1, sizeof(struct mrt));
2790 				if (mrt == NULL)
2791 					fatal("session_dispatch_imsg");
2792 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2793 				TAILQ_INIT(&mrt->wbuf.bufs);
2794 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2795 			} else {
2796 				/* old dump reopened */
2797 				close(mrt->wbuf.fd);
2798 				mrt->wbuf.fd = xmrt.wbuf.fd;
2799 			}
2800 			break;
2801 		case IMSG_MRT_CLOSE:
2802 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2803 			    sizeof(struct mrt)) {
2804 				log_warnx("wrong imsg len");
2805 				break;
2806 			}
2807 
2808 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2809 			mrt = mrt_get(&mrthead, &xmrt);
2810 			if (mrt != NULL)
2811 				mrt_done(mrt);
2812 			break;
2813 		case IMSG_CTL_KROUTE:
2814 		case IMSG_CTL_KROUTE_ADDR:
2815 		case IMSG_CTL_SHOW_NEXTHOP:
2816 		case IMSG_CTL_SHOW_INTERFACE:
2817 		case IMSG_CTL_SHOW_FIB_TABLES:
2818 			if (idx != PFD_PIPE_MAIN)
2819 				fatalx("ctl kroute request not from parent");
2820 			control_imsg_relay(&imsg);
2821 			break;
2822 		case IMSG_CTL_SHOW_RIB:
2823 		case IMSG_CTL_SHOW_RIB_PREFIX:
2824 		case IMSG_CTL_SHOW_RIB_ATTR:
2825 		case IMSG_CTL_SHOW_RIB_MEM:
2826 		case IMSG_CTL_SHOW_NETWORK:
2827 		case IMSG_CTL_SHOW_NEIGHBOR:
2828 			if (idx != PFD_PIPE_ROUTE_CTL)
2829 				fatalx("ctl rib request not from RDE");
2830 			control_imsg_relay(&imsg);
2831 			break;
2832 		case IMSG_CTL_END:
2833 		case IMSG_CTL_RESULT:
2834 			control_imsg_relay(&imsg);
2835 			break;
2836 		case IMSG_UPDATE:
2837 			if (idx != PFD_PIPE_ROUTE)
2838 				fatalx("update request not from RDE");
2839 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2840 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2841 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2842 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2843 				log_warnx("RDE sent invalid update");
2844 			else
2845 				session_update(imsg.hdr.peerid, imsg.data,
2846 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2847 			break;
2848 		case IMSG_UPDATE_ERR:
2849 			if (idx != PFD_PIPE_ROUTE)
2850 				fatalx("update request not from RDE");
2851 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2852 				log_warnx("RDE sent invalid notification");
2853 				break;
2854 			}
2855 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2856 				log_warnx("no such peer: id=%u",
2857 				    imsg.hdr.peerid);
2858 				break;
2859 			}
2860 			data = imsg.data;
2861 			errcode = *data++;
2862 			subcode = *data++;
2863 
2864 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2865 				data = NULL;
2866 
2867 			session_notification(p, errcode, subcode,
2868 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2869 			switch (errcode) {
2870 			case ERR_CEASE:
2871 				switch (subcode) {
2872 				case ERR_CEASE_MAX_PREFIX:
2873 					bgp_fsm(p, EVNT_STOP);
2874 					if (p->conf.max_prefix_restart)
2875 						timer_set(p, Timer_IdleHold, 60 *
2876 						    p->conf.max_prefix_restart);
2877 					break;
2878 				default:
2879 					bgp_fsm(p, EVNT_CON_FATAL);
2880 					break;
2881 				}
2882 				break;
2883 			default:
2884 				bgp_fsm(p, EVNT_CON_FATAL);
2885 				break;
2886 			}
2887 			break;
2888 		case IMSG_SESSION_RESTARTED:
2889 			if (idx != PFD_PIPE_ROUTE)
2890 				fatalx("update request not from RDE");
2891 			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
2892 				log_warnx("RDE sent invalid restart msg");
2893 				break;
2894 			}
2895 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2896 				log_warnx("no such peer: id=%u",
2897 				    imsg.hdr.peerid);
2898 				break;
2899 			}
2900 			memcpy(&aid, imsg.data, sizeof(aid));
2901 			if (aid >= AID_MAX)
2902 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
2903 			if (p->capa.neg.grestart.flags[aid] &
2904 			    CAPA_GR_RESTARTING) {
2905 				log_peer_warnx(&p->conf,
2906 				    "graceful restart of %s finished",
2907 				    aid2str(aid));
2908 				p->capa.neg.grestart.flags[aid] &=
2909 				    ~CAPA_GR_RESTARTING;
2910 				timer_stop(p, Timer_RestartTimeout);
2911 
2912 				/* signal back to RDE to cleanup stale routes */
2913 				if (imsg_compose(ibuf_rde,
2914 				    IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0,
2915 				    -1, &aid, sizeof(aid)) == -1)
2916 					fatal("imsg_compose: "
2917 					    "IMSG_SESSION_RESTARTED");
2918 			}
2919 			break;
2920 		case IMSG_SESSION_DOWN:
2921 			if (idx != PFD_PIPE_ROUTE)
2922 				fatalx("update request not from RDE");
2923 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2924 				log_warnx("no such peer: id=%u",
2925 				    imsg.hdr.peerid);
2926 				break;
2927 			}
2928 			session_stop(p, ERR_CEASE_ADMIN_DOWN);
2929 			break;
2930 		default:
2931 			break;
2932 		}
2933 		imsg_free(&imsg);
2934 	}
2935 }
2936 
2937 int
2938 la_cmp(struct listen_addr *a, struct listen_addr *b)
2939 {
2940 	struct sockaddr_in	*in_a, *in_b;
2941 	struct sockaddr_in6	*in6_a, *in6_b;
2942 
2943 	if (a->sa.ss_family != b->sa.ss_family)
2944 		return (1);
2945 
2946 	switch (a->sa.ss_family) {
2947 	case AF_INET:
2948 		in_a = (struct sockaddr_in *)&a->sa;
2949 		in_b = (struct sockaddr_in *)&b->sa;
2950 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2951 			return (1);
2952 		if (in_a->sin_port != in_b->sin_port)
2953 			return (1);
2954 		break;
2955 	case AF_INET6:
2956 		in6_a = (struct sockaddr_in6 *)&a->sa;
2957 		in6_b = (struct sockaddr_in6 *)&b->sa;
2958 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2959 		    sizeof(struct in6_addr)))
2960 			return (1);
2961 		if (in6_a->sin6_port != in6_b->sin6_port)
2962 			return (1);
2963 		break;
2964 	default:
2965 		fatal("king bula sez: unknown address family");
2966 		/* NOTREACHED */
2967 	}
2968 
2969 	return (0);
2970 }
2971 
2972 struct peer *
2973 getpeerbyaddr(struct bgpd_addr *addr)
2974 {
2975 	struct peer *p;
2976 
2977 	/* we might want a more effective way to find peers by IP */
2978 	for (p = peers; p != NULL &&
2979 	    memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr));
2980 	    p = p->next)
2981 		;	/* nothing */
2982 
2983 	return (p);
2984 }
2985 
2986 struct peer *
2987 getpeerbydesc(const char *descr)
2988 {
2989 	struct peer	*p, *res = NULL;
2990 	int		 match = 0;
2991 
2992 	for (p = peers; p != NULL; p = p->next)
2993 		if (!strcmp(p->conf.descr, descr)) {
2994 			res = p;
2995 			match++;
2996 		}
2997 
2998 	if (match > 1)
2999 		log_info("neighbor description \"%s\" not unique, request "
3000 		    "aborted", descr);
3001 
3002 	if (match == 1)
3003 		return (res);
3004 	else
3005 		return (NULL);
3006 }
3007 
3008 struct peer *
3009 getpeerbyip(struct sockaddr *ip)
3010 {
3011 	struct bgpd_addr addr;
3012 	struct peer	*p, *newpeer, *loose = NULL;
3013 	u_int32_t	 id;
3014 
3015 	sa2addr(ip, &addr);
3016 
3017 	/* we might want a more effective way to find peers by IP */
3018 	for (p = peers; p != NULL; p = p->next)
3019 		if (!p->conf.template &&
3020 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3021 			return (p);
3022 
3023 	/* try template matching */
3024 	for (p = peers; p != NULL; p = p->next)
3025 		if (p->conf.template &&
3026 		    p->conf.remote_addr.aid == addr.aid &&
3027 		    session_match_mask(p, &addr))
3028 			if (loose == NULL || loose->conf.remote_masklen <
3029 			    p->conf.remote_masklen)
3030 				loose = p;
3031 
3032 	if (loose != NULL) {
3033 		/* clone */
3034 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3035 			fatal(NULL);
3036 		memcpy(newpeer, loose, sizeof(struct peer));
3037 		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
3038 			for (p = peers; p != NULL && p->conf.id != id;
3039 			    p = p->next)
3040 				;	/* nothing */
3041 			if (p == NULL) {	/* we found a free id */
3042 				break;
3043 			}
3044 		}
3045 		newpeer->template = loose;
3046 		session_template_clone(newpeer, ip, id, 0);
3047 		newpeer->state = newpeer->prev_state = STATE_NONE;
3048 		newpeer->conf.reconf_action = RECONF_KEEP;
3049 		newpeer->rbuf = NULL;
3050 		init_peer(newpeer);
3051 		bgp_fsm(newpeer, EVNT_START);
3052 		newpeer->next = peers;
3053 		peers = newpeer;
3054 		return (newpeer);
3055 	}
3056 
3057 	return (NULL);
3058 }
3059 
3060 void
3061 session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id,
3062     u_int32_t as)
3063 {
3064 	struct bgpd_addr	remote_addr;
3065 
3066 	if (ip)
3067 		sa2addr(ip, &remote_addr);
3068 	else
3069 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3070 
3071 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3072 
3073 	p->conf.id = id;
3074 
3075 	if (as) {
3076 		p->conf.remote_as = as;
3077 		p->conf.ebgp = (p->conf.remote_as != conf->as);
3078 		if (!p->conf.ebgp)
3079 			/* force enforce_as off for iBGP sessions */
3080 			p->conf.enforce_as = ENFORCE_AS_OFF;
3081 	}
3082 
3083 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3084 	switch (p->conf.remote_addr.aid) {
3085 	case AID_INET:
3086 		p->conf.remote_masklen = 32;
3087 		break;
3088 	case AID_INET6:
3089 		p->conf.remote_masklen = 128;
3090 		break;
3091 	}
3092 	p->conf.template = 0;
3093 }
3094 
3095 int
3096 session_match_mask(struct peer *p, struct bgpd_addr *a)
3097 {
3098 	in_addr_t	 v4mask;
3099 	struct in6_addr	 masked;
3100 
3101 	switch (p->conf.remote_addr.aid) {
3102 	case AID_INET:
3103 		v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
3104 		if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask))
3105 			return (1);
3106 		return (0);
3107 	case AID_INET6:
3108 		inet6applymask(&masked, &a->v6, p->conf.remote_masklen);
3109 
3110 		if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked)))
3111 			return (1);
3112 		return (0);
3113 	}
3114 	return (0);
3115 }
3116 
3117 struct peer *
3118 getpeerbyid(u_int32_t peerid)
3119 {
3120 	struct peer *p;
3121 
3122 	/* we might want a more effective way to find peers by IP */
3123 	for (p = peers; p != NULL &&
3124 	    p->conf.id != peerid; p = p->next)
3125 		;	/* nothing */
3126 
3127 	return (p);
3128 }
3129 
3130 void
3131 session_down(struct peer *peer)
3132 {
3133 	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
3134 	peer->stats.last_updown = time(NULL);
3135 	if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
3136 	    NULL, 0) == -1)
3137 		fatalx("imsg_compose error");
3138 }
3139 
3140 void
3141 session_up(struct peer *p)
3142 {
3143 	struct session_up	 sup;
3144 
3145 	if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
3146 	    &p->conf, sizeof(p->conf)) == -1)
3147 		fatalx("imsg_compose error");
3148 
3149 	sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr);
3150 	sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr);
3151 
3152 	sup.remote_bgpid = p->remote_bgpid;
3153 	sup.short_as = p->short_as;
3154 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3155 	p->stats.last_updown = time(NULL);
3156 	if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
3157 	    &sup, sizeof(sup)) == -1)
3158 		fatalx("imsg_compose error");
3159 }
3160 
3161 int
3162 imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data,
3163     u_int16_t datalen)
3164 {
3165 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
3166 }
3167 
3168 int
3169 imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen)
3170 {
3171 	/*
3172 	 * Use control socket to talk to RDE to bypass the queue of the
3173 	 * regular imsg socket.
3174 	 */
3175 	return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen));
3176 }
3177 
3178 void
3179 session_demote(struct peer *p, int level)
3180 {
3181 	struct demote_msg	msg;
3182 
3183 	strlcpy(msg.demote_group, p->conf.demote_group,
3184 	    sizeof(msg.demote_group));
3185 	msg.level = level;
3186 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3187 	    &msg, sizeof(msg)) == -1)
3188 		fatalx("imsg_compose error");
3189 
3190 	p->demoted += level;
3191 }
3192 
3193 void
3194 session_stop(struct peer *peer, u_int8_t subcode)
3195 {
3196 	switch (peer->state) {
3197 	case STATE_OPENSENT:
3198 	case STATE_OPENCONFIRM:
3199 	case STATE_ESTABLISHED:
3200 		session_notification(peer, ERR_CEASE, subcode, NULL, 0);
3201 		break;
3202 	default:
3203 		/* session not open, no need to send notification */
3204 		break;
3205 	}
3206 	bgp_fsm(peer, EVNT_STOP);
3207 }
3208