xref: /openbsd-src/usr.sbin/bgpd/session.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: session.c,v 1.323 2012/07/11 09:43:10 sthen Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <net/if_types.h>
28 #include <netinet/in.h>
29 #include <netinet/in_systm.h>
30 #include <netinet/ip.h>
31 #include <netinet/tcp.h>
32 #include <arpa/inet.h>
33 
34 #include <err.h>
35 #include <errno.h>
36 #include <fcntl.h>
37 #include <poll.h>
38 #include <pwd.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 
45 #include "bgpd.h"
46 #include "mrt.h"
47 #include "session.h"
48 
49 #define PFD_PIPE_MAIN		0
50 #define PFD_PIPE_ROUTE		1
51 #define PFD_PIPE_ROUTE_CTL	2
52 #define PFD_SOCK_CTL		3
53 #define PFD_SOCK_RCTL		4
54 #define PFD_SOCK_PFKEY		5
55 #define PFD_LISTENERS_START	6
56 
57 void	session_sighdlr(int);
58 int	setup_listeners(u_int *);
59 void	init_conf(struct bgpd_config *);
60 void	init_peer(struct peer *);
61 void	start_timer_holdtime(struct peer *);
62 void	start_timer_keepalive(struct peer *);
63 void	session_close_connection(struct peer *);
64 void	change_state(struct peer *, enum session_state, enum session_events);
65 int	session_setup_socket(struct peer *);
66 void	session_accept(int);
67 int	session_connect(struct peer *);
68 void	session_tcp_established(struct peer *);
69 void	session_capa_ann_none(struct peer *);
70 int	session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
71 int	session_capa_add_mp(struct ibuf *, u_int8_t);
72 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
73 int	session_sendmsg(struct bgp_msg *, struct peer *);
74 void	session_open(struct peer *);
75 void	session_keepalive(struct peer *);
76 void	session_update(u_int32_t, void *, size_t);
77 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
78 	    ssize_t);
79 void	session_rrefresh(struct peer *, u_int8_t);
80 int	session_dispatch_msg(struct pollfd *, struct peer *);
81 int	session_process_msg(struct peer *);
82 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
83 int	parse_open(struct peer *);
84 int	parse_update(struct peer *);
85 int	parse_refresh(struct peer *);
86 int	parse_notification(struct peer *);
87 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
88 int	capa_neg_calc(struct peer *);
89 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
90 void	session_up(struct peer *);
91 void	session_down(struct peer *);
92 void	session_demote(struct peer *, int);
93 
94 int		 la_cmp(struct listen_addr *, struct listen_addr *);
95 struct peer	*getpeerbyip(struct sockaddr *);
96 int		 session_match_mask(struct peer *, struct bgpd_addr *);
97 struct peer	*getpeerbyid(u_int32_t);
98 
99 struct bgpd_config	*conf, *nconf;
100 struct bgpd_sysdep	 sysdep;
101 struct peer		*peers, *npeers;
102 volatile sig_atomic_t	 session_quit;
103 int			 pending_reconf;
104 int			 csock = -1, rcsock = -1;
105 u_int			 peer_cnt;
106 struct imsgbuf		*ibuf_rde;
107 struct imsgbuf		*ibuf_rde_ctl;
108 struct imsgbuf		*ibuf_main;
109 
110 struct mrt_head		 mrthead;
111 time_t			 pauseaccept;
112 
113 void
114 session_sighdlr(int sig)
115 {
116 	switch (sig) {
117 	case SIGINT:
118 	case SIGTERM:
119 		session_quit = 1;
120 		break;
121 	}
122 }
123 
124 int
125 setup_listeners(u_int *la_cnt)
126 {
127 	int			 ttl = 255;
128 	int			 opt;
129 	struct listen_addr	*la;
130 	u_int			 cnt = 0;
131 
132 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
133 		la->reconf = RECONF_NONE;
134 		cnt++;
135 
136 		if (la->flags & LISTENER_LISTENING)
137 			continue;
138 
139 		if (la->fd == -1) {
140 			log_warn("cannot establish listener on %s: invalid fd",
141 			    log_sockaddr((struct sockaddr *)&la->sa));
142 			continue;
143 		}
144 
145 		opt = 1;
146 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
147 		    &opt, sizeof(opt)) == -1) {
148 			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
149 				log_warnx("md5sig not available, disabling");
150 				sysdep.no_md5sig = 1;
151 			} else
152 				fatal("setsockopt TCP_MD5SIG");
153 		}
154 
155 		/* set ttl to 255 so that ttl-security works */
156 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
157 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
158 			log_warn("setup_listeners setsockopt TTL");
159 			continue;
160 		}
161 
162 		session_socket_blockmode(la->fd, BM_NONBLOCK);
163 
164 		if (listen(la->fd, MAX_BACKLOG)) {
165 			close(la->fd);
166 			fatal("listen");
167 		}
168 
169 		la->flags |= LISTENER_LISTENING;
170 
171 		log_info("listening on %s",
172 		    log_sockaddr((struct sockaddr *)&la->sa));
173 	}
174 
175 	*la_cnt = cnt;
176 
177 	return (0);
178 }
179 
180 pid_t
181 session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2],
182     int pipe_s2rctl[2])
183 {
184 	int			 nfds, timeout, pfkeysock;
185 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
186 	pid_t			 pid;
187 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
188 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
189 	u_int			 new_cnt;
190 	u_int32_t		 ctl_queued;
191 	struct passwd		*pw;
192 	struct peer		*p, **peer_l = NULL, *last, *next;
193 	struct mrt		*m, *xm, **mrt_l = NULL;
194 	struct pollfd		*pfd = NULL;
195 	struct ctl_conn		*ctl_conn;
196 	struct listen_addr	*la;
197 	void			*newp;
198 	short			 events;
199 
200 	switch (pid = fork()) {
201 	case -1:
202 		fatal("cannot fork");
203 	case 0:
204 		break;
205 	default:
206 		return (pid);
207 	}
208 
209 	if ((pw = getpwnam(BGPD_USER)) == NULL)
210 		fatal(NULL);
211 
212 	if (chroot(pw->pw_dir) == -1)
213 		fatal("chroot");
214 	if (chdir("/") == -1)
215 		fatal("chdir(\"/\")");
216 
217 	setproctitle("session engine");
218 	bgpd_process = PROC_SE;
219 	pfkeysock = pfkey_init(&sysdep);
220 
221 	if (setgroups(1, &pw->pw_gid) ||
222 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
223 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
224 		fatal("can't drop privileges");
225 
226 	signal(SIGTERM, session_sighdlr);
227 	signal(SIGINT, session_sighdlr);
228 	signal(SIGPIPE, SIG_IGN);
229 	signal(SIGHUP, SIG_IGN);
230 	signal(SIGALRM, SIG_IGN);
231 	signal(SIGUSR1, SIG_IGN);
232 
233 	close(pipe_m2s[0]);
234 	close(pipe_s2r[1]);
235 	close(pipe_s2rctl[1]);
236 	close(pipe_m2r[0]);
237 	close(pipe_m2r[1]);
238 	if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
239 	    (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL ||
240 	    (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
241 		fatal(NULL);
242 	imsg_init(ibuf_rde, pipe_s2r[0]);
243 	imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]);
244 	imsg_init(ibuf_main, pipe_m2s[1]);
245 
246 	TAILQ_INIT(&ctl_conns);
247 	LIST_INIT(&mrthead);
248 	listener_cnt = 0;
249 	peer_cnt = 0;
250 	ctl_cnt = 0;
251 
252 	if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL)
253 		fatal(NULL);
254 	if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) ==
255 	    NULL)
256 		fatal(NULL);
257 	TAILQ_INIT(conf->listen_addrs);
258 
259 	log_info("session engine ready");
260 
261 	while (session_quit == 0) {
262 		/* check for peers to be initialized or deleted */
263 		last = NULL;
264 		for (p = peers; p != NULL; p = next) {
265 			next = p->next;
266 			if (!pending_reconf) {
267 				/* cloned peer that idled out? */
268 				if (p->state == STATE_IDLE && p->conf.cloned &&
269 				    time(NULL) - p->stats.last_updown >=
270 				    INTERVAL_HOLD_CLONED)
271 					p->conf.reconf_action = RECONF_DELETE;
272 
273 				/* new peer that needs init? */
274 				if (p->state == STATE_NONE)
275 					init_peer(p);
276 
277 				/* reinit due? */
278 				if (p->conf.reconf_action == RECONF_REINIT) {
279 					session_stop(p, ERR_CEASE_ADMIN_RESET);
280 					if (!p->conf.down)
281 						timer_set(p, Timer_IdleHold, 0);
282 				}
283 
284 				/* deletion due? */
285 				if (p->conf.reconf_action == RECONF_DELETE) {
286 					if (p->demoted)
287 						session_demote(p, -1);
288 					p->conf.demote_group[0] = 0;
289 					session_stop(p, ERR_CEASE_PEER_UNCONF);
290 					log_peer_warnx(&p->conf, "removed");
291 					if (last != NULL)
292 						last->next = next;
293 					else
294 						peers = next;
295 					timer_remove_all(p);
296 					free(p);
297 					peer_cnt--;
298 					continue;
299 				}
300 				p->conf.reconf_action = RECONF_NONE;
301 			}
302 			last = p;
303 		}
304 
305 		if (peer_cnt > peer_l_elms) {
306 			if ((newp = realloc(peer_l, sizeof(struct peer *) *
307 			    peer_cnt)) == NULL) {
308 				/* panic for now  */
309 				log_warn("could not resize peer_l from %u -> %u"
310 				    " entries", peer_l_elms, peer_cnt);
311 				fatalx("exiting");
312 			}
313 			peer_l = newp;
314 			peer_l_elms = peer_cnt;
315 		}
316 
317 		mrt_cnt = 0;
318 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
319 			xm = LIST_NEXT(m, entry);
320 			if (m->state == MRT_STATE_REMOVE) {
321 				mrt_clean(m);
322 				LIST_REMOVE(m, entry);
323 				free(m);
324 				continue;
325 			}
326 			if (m->wbuf.queued)
327 				mrt_cnt++;
328 		}
329 
330 		if (mrt_cnt > mrt_l_elms) {
331 			if ((newp = realloc(mrt_l, sizeof(struct mrt *) *
332 			    mrt_cnt)) == NULL) {
333 				/* panic for now  */
334 				log_warn("could not resize mrt_l from %u -> %u"
335 				    " entries", mrt_l_elms, mrt_cnt);
336 				fatalx("exiting");
337 			}
338 			mrt_l = newp;
339 			mrt_l_elms = mrt_cnt;
340 		}
341 
342 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
343 		    ctl_cnt + mrt_cnt;
344 		if (new_cnt > pfd_elms) {
345 			if ((newp = realloc(pfd, sizeof(struct pollfd) *
346 			    new_cnt)) == NULL) {
347 				/* panic for now  */
348 				log_warn("could not resize pfd from %u -> %u"
349 				    " entries", pfd_elms, new_cnt);
350 				fatalx("exiting");
351 			}
352 			pfd = newp;
353 			pfd_elms = new_cnt;
354 		}
355 
356 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
357 		pfd[PFD_PIPE_MAIN].fd = ibuf_main->fd;
358 		pfd[PFD_PIPE_MAIN].events = POLLIN;
359 		if (ibuf_main->w.queued > 0)
360 			pfd[PFD_PIPE_MAIN].events |= POLLOUT;
361 		pfd[PFD_PIPE_ROUTE].fd = ibuf_rde->fd;
362 		pfd[PFD_PIPE_ROUTE].events = POLLIN;
363 		if (ibuf_rde->w.queued > 0)
364 			pfd[PFD_PIPE_ROUTE].events |= POLLOUT;
365 
366 		ctl_queued = 0;
367 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry)
368 			ctl_queued += ctl_conn->ibuf.w.queued;
369 
370 		pfd[PFD_PIPE_ROUTE_CTL].fd = ibuf_rde_ctl->fd;
371 		if (ctl_queued < SESSION_CTL_QUEUE_MAX)
372 			/*
373 			 * Do not act as unlimited buffer. Don't read in more
374 			 * messages if the ctl sockets are getting full.
375 			 */
376 			pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN;
377 		if (pauseaccept == 0) {
378 			pfd[PFD_SOCK_CTL].fd = csock;
379 			pfd[PFD_SOCK_CTL].events = POLLIN;
380 			pfd[PFD_SOCK_RCTL].fd = rcsock;
381 			pfd[PFD_SOCK_RCTL].events = POLLIN;
382 		} else {
383 			pfd[PFD_SOCK_CTL].fd = -1;
384 			pfd[PFD_SOCK_RCTL].fd = -1;
385 		}
386 		pfd[PFD_SOCK_PFKEY].fd = pfkeysock;
387 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
388 
389 		i = PFD_LISTENERS_START;
390 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
391 			if (pauseaccept == 0) {
392 				pfd[i].fd = la->fd;
393 				pfd[i].events = POLLIN;
394 			} else
395 				pfd[i].fd = -1;
396 			i++;
397 		}
398 		idx_listeners = i;
399 		timeout = 240;	/* loop every 240s at least */
400 
401 		for (p = peers; p != NULL; p = p->next) {
402 			time_t	nextaction;
403 			struct peer_timer *pt;
404 
405 			/* check timers */
406 			if ((pt = timer_nextisdue(p)) != NULL) {
407 				switch (pt->type) {
408 				case Timer_Hold:
409 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
410 					break;
411 				case Timer_ConnectRetry:
412 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
413 					break;
414 				case Timer_Keepalive:
415 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
416 					break;
417 				case Timer_IdleHold:
418 					bgp_fsm(p, EVNT_START);
419 					break;
420 				case Timer_IdleHoldReset:
421 					p->IdleHoldTime /= 2;
422 					if (p->IdleHoldTime <=
423 					    INTERVAL_IDLE_HOLD_INITIAL) {
424 						p->IdleHoldTime =
425 						    INTERVAL_IDLE_HOLD_INITIAL;
426 						timer_stop(p,
427 						    Timer_IdleHoldReset);
428 						p->errcnt = 0;
429 					} else
430 						timer_set(p,
431 						    Timer_IdleHoldReset,
432 						    p->IdleHoldTime);
433 					break;
434 				case Timer_CarpUndemote:
435 					timer_stop(p, Timer_CarpUndemote);
436 					if (p->demoted &&
437 					    p->state == STATE_ESTABLISHED)
438 						session_demote(p, -1);
439 					break;
440 				default:
441 					fatalx("King Bula lost in time");
442 				}
443 			}
444 			if ((nextaction = timer_nextduein(p)) != -1 &&
445 			    nextaction < timeout)
446 				timeout = nextaction;
447 
448 			/* are we waiting for a write? */
449 			events = POLLIN;
450 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
451 				events |= POLLOUT;
452 			/* is there still work to do? */
453 			if (p->rbuf && p->rbuf->wpos)
454 				timeout = 0;
455 
456 			/* poll events */
457 			if (p->fd != -1 && events != 0) {
458 				pfd[i].fd = p->fd;
459 				pfd[i].events = events;
460 				peer_l[i - idx_listeners] = p;
461 				i++;
462 			}
463 		}
464 
465 		idx_peers = i;
466 
467 		LIST_FOREACH(m, &mrthead, entry)
468 			if (m->wbuf.queued) {
469 				pfd[i].fd = m->wbuf.fd;
470 				pfd[i].events = POLLOUT;
471 				mrt_l[i - idx_peers] = m;
472 				i++;
473 			}
474 
475 		idx_mrts = i;
476 
477 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
478 			pfd[i].fd = ctl_conn->ibuf.fd;
479 			pfd[i].events = POLLIN;
480 			if (ctl_conn->ibuf.w.queued > 0)
481 				pfd[i].events |= POLLOUT;
482 			i++;
483 		}
484 
485 		if (pauseaccept && timeout > 1)
486 			timeout = 1;
487 		if (timeout < 0)
488 			timeout = 0;
489 		if ((nfds = poll(pfd, i, timeout * 1000)) == -1)
490 			if (errno != EINTR)
491 				fatal("poll error");
492 
493 		/*
494 		 * If we previously saw fd exhaustion, we stop accept()
495 		 * for 1 second to throttle the accept() loop.
496 		 */
497 		if (pauseaccept && getmonotime() > pauseaccept + 1)
498 			pauseaccept = 0;
499 
500 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLOUT)
501 			if (msgbuf_write(&ibuf_main->w) < 0)
502 				fatal("pipe write error");
503 
504 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLIN) {
505 			nfds--;
506 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
507 			    &listener_cnt);
508 		}
509 
510 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLOUT)
511 			if (msgbuf_write(&ibuf_rde->w) < 0)
512 				fatal("pipe write error");
513 
514 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLIN) {
515 			nfds--;
516 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
517 			    &listener_cnt);
518 		}
519 
520 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE_CTL].revents & POLLIN) {
521 			nfds--;
522 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
523 			    &listener_cnt);
524 		}
525 
526 		if (nfds > 0 && pfd[PFD_SOCK_CTL].revents & POLLIN) {
527 			nfds--;
528 			ctl_cnt += control_accept(csock, 0);
529 		}
530 
531 		if (nfds > 0 && pfd[PFD_SOCK_RCTL].revents & POLLIN) {
532 			nfds--;
533 			ctl_cnt += control_accept(rcsock, 1);
534 		}
535 
536 		if (nfds > 0 && pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
537 			nfds--;
538 			if (pfkey_read(pfkeysock, NULL) == -1) {
539 				log_warnx("pfkey_read failed, exiting...");
540 				session_quit = 1;
541 			}
542 		}
543 
544 		for (j = PFD_LISTENERS_START; nfds > 0 && j < idx_listeners;
545 		    j++)
546 			if (pfd[j].revents & POLLIN) {
547 				nfds--;
548 				session_accept(pfd[j].fd);
549 			}
550 
551 		for (; nfds > 0 && j < idx_peers; j++)
552 			nfds -= session_dispatch_msg(&pfd[j],
553 			    peer_l[j - idx_listeners]);
554 
555 		for (p = peers; p != NULL; p = p->next)
556 			if (p->rbuf && p->rbuf->wpos)
557 				session_process_msg(p);
558 
559 		for (; nfds > 0 && j < idx_mrts; j++)
560 			if (pfd[j].revents & POLLOUT) {
561 				nfds--;
562 				mrt_write(mrt_l[j - idx_peers]);
563 			}
564 
565 		for (; nfds > 0 && j < i; j++)
566 			nfds -= control_dispatch_msg(&pfd[j], &ctl_cnt);
567 	}
568 
569 	while ((p = peers) != NULL) {
570 		peers = p->next;
571 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
572 		pfkey_remove(p);
573 		free(p);
574 	}
575 
576 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
577 		mrt_clean(m);
578 		LIST_REMOVE(m, entry);
579 		free(m);
580 	}
581 
582 	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
583 		TAILQ_REMOVE(conf->listen_addrs, la, entry);
584 		free(la);
585 	}
586 	free(conf->listen_addrs);
587 	free(peer_l);
588 	free(mrt_l);
589 	free(pfd);
590 
591 	msgbuf_write(&ibuf_rde->w);
592 	msgbuf_clear(&ibuf_rde->w);
593 	free(ibuf_rde);
594 	msgbuf_write(&ibuf_main->w);
595 	msgbuf_clear(&ibuf_main->w);
596 	free(ibuf_main);
597 
598 	control_shutdown(csock);
599 	control_shutdown(rcsock);
600 	log_info("session engine exiting");
601 	_exit(0);
602 }
603 
604 void
605 init_conf(struct bgpd_config *c)
606 {
607 	if (!c->holdtime)
608 		c->holdtime = INTERVAL_HOLD;
609 	if (!c->connectretry)
610 		c->connectretry = INTERVAL_CONNECTRETRY;
611 }
612 
613 void
614 init_peer(struct peer *p)
615 {
616 	TAILQ_INIT(&p->timers);
617 	p->fd = p->wbuf.fd = -1;
618 
619 	if (p->conf.if_depend[0])
620 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
621 		    p->conf.if_depend, sizeof(p->conf.if_depend));
622 	else
623 		p->depend_ok = 1;
624 
625 	peer_cnt++;
626 
627 	change_state(p, STATE_IDLE, EVNT_NONE);
628 	if (p->conf.down)
629 		timer_stop(p, Timer_IdleHold);		/* no autostart */
630 	else
631 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
632 
633 	/*
634 	 * on startup, demote if requested.
635 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
636 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
637 	 */
638 	if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0])
639 		session_demote(p, +1);
640 }
641 
642 void
643 bgp_fsm(struct peer *peer, enum session_events event)
644 {
645 	switch (peer->state) {
646 	case STATE_NONE:
647 		/* nothing */
648 		break;
649 	case STATE_IDLE:
650 		switch (event) {
651 		case EVNT_START:
652 			timer_stop(peer, Timer_Hold);
653 			timer_stop(peer, Timer_Keepalive);
654 			timer_stop(peer, Timer_IdleHold);
655 
656 			/* allocate read buffer */
657 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
658 			if (peer->rbuf == NULL)
659 				fatal(NULL);
660 
661 			/* init write buffer */
662 			msgbuf_init(&peer->wbuf);
663 
664 			/* init pfkey - remove old if any, load new ones */
665 			pfkey_remove(peer);
666 			if (pfkey_establish(peer) == -1) {
667 				log_peer_warnx(&peer->conf,
668 				    "pfkey setup failed");
669 				return;
670 			}
671 
672 			peer->stats.last_sent_errcode = 0;
673 			peer->stats.last_sent_suberr = 0;
674 
675 			if (!peer->depend_ok)
676 				timer_stop(peer, Timer_ConnectRetry);
677 			else if (peer->passive || peer->conf.passive ||
678 			    peer->conf.template) {
679 				change_state(peer, STATE_ACTIVE, event);
680 				timer_stop(peer, Timer_ConnectRetry);
681 			} else {
682 				change_state(peer, STATE_CONNECT, event);
683 				timer_set(peer, Timer_ConnectRetry,
684 				    conf->connectretry);
685 				session_connect(peer);
686 			}
687 			peer->passive = 0;
688 			break;
689 		default:
690 			/* ignore */
691 			break;
692 		}
693 		break;
694 	case STATE_CONNECT:
695 		switch (event) {
696 		case EVNT_START:
697 			/* ignore */
698 			break;
699 		case EVNT_CON_OPEN:
700 			session_tcp_established(peer);
701 			session_open(peer);
702 			timer_stop(peer, Timer_ConnectRetry);
703 			peer->holdtime = INTERVAL_HOLD_INITIAL;
704 			start_timer_holdtime(peer);
705 			change_state(peer, STATE_OPENSENT, event);
706 			break;
707 		case EVNT_CON_OPENFAIL:
708 			timer_set(peer, Timer_ConnectRetry,
709 			    conf->connectretry);
710 			session_close_connection(peer);
711 			change_state(peer, STATE_ACTIVE, event);
712 			break;
713 		case EVNT_TIMER_CONNRETRY:
714 			timer_set(peer, Timer_ConnectRetry,
715 			    conf->connectretry);
716 			session_connect(peer);
717 			break;
718 		default:
719 			change_state(peer, STATE_IDLE, event);
720 			break;
721 		}
722 		break;
723 	case STATE_ACTIVE:
724 		switch (event) {
725 		case EVNT_START:
726 			/* ignore */
727 			break;
728 		case EVNT_CON_OPEN:
729 			session_tcp_established(peer);
730 			session_open(peer);
731 			timer_stop(peer, Timer_ConnectRetry);
732 			peer->holdtime = INTERVAL_HOLD_INITIAL;
733 			start_timer_holdtime(peer);
734 			change_state(peer, STATE_OPENSENT, event);
735 			break;
736 		case EVNT_CON_OPENFAIL:
737 			timer_set(peer, Timer_ConnectRetry,
738 			    conf->connectretry);
739 			session_close_connection(peer);
740 			change_state(peer, STATE_ACTIVE, event);
741 			break;
742 		case EVNT_TIMER_CONNRETRY:
743 			timer_set(peer, Timer_ConnectRetry,
744 			    peer->holdtime);
745 			change_state(peer, STATE_CONNECT, event);
746 			session_connect(peer);
747 			break;
748 		default:
749 			change_state(peer, STATE_IDLE, event);
750 			break;
751 		}
752 		break;
753 	case STATE_OPENSENT:
754 		switch (event) {
755 		case EVNT_START:
756 			/* ignore */
757 			break;
758 		case EVNT_STOP:
759 			change_state(peer, STATE_IDLE, event);
760 			break;
761 		case EVNT_CON_CLOSED:
762 			session_close_connection(peer);
763 			timer_set(peer, Timer_ConnectRetry,
764 			    conf->connectretry);
765 			change_state(peer, STATE_ACTIVE, event);
766 			break;
767 		case EVNT_CON_FATAL:
768 			change_state(peer, STATE_IDLE, event);
769 			break;
770 		case EVNT_TIMER_HOLDTIME:
771 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
772 			    0, NULL, 0);
773 			change_state(peer, STATE_IDLE, event);
774 			break;
775 		case EVNT_RCVD_OPEN:
776 			/* parse_open calls change_state itself on failure */
777 			if (parse_open(peer))
778 				break;
779 			session_keepalive(peer);
780 			change_state(peer, STATE_OPENCONFIRM, event);
781 			break;
782 		case EVNT_RCVD_NOTIFICATION:
783 			if (parse_notification(peer)) {
784 				change_state(peer, STATE_IDLE, event);
785 				/* don't punish, capa negotiation */
786 				timer_set(peer, Timer_IdleHold, 0);
787 				peer->IdleHoldTime /= 2;
788 			} else
789 				change_state(peer, STATE_IDLE, event);
790 			break;
791 		default:
792 			session_notification(peer,
793 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
794 			change_state(peer, STATE_IDLE, event);
795 			break;
796 		}
797 		break;
798 	case STATE_OPENCONFIRM:
799 		switch (event) {
800 		case EVNT_START:
801 			/* ignore */
802 			break;
803 		case EVNT_STOP:
804 			change_state(peer, STATE_IDLE, event);
805 			break;
806 		case EVNT_CON_CLOSED:
807 		case EVNT_CON_FATAL:
808 			change_state(peer, STATE_IDLE, event);
809 			break;
810 		case EVNT_TIMER_HOLDTIME:
811 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
812 			    0, NULL, 0);
813 			change_state(peer, STATE_IDLE, event);
814 			break;
815 		case EVNT_TIMER_KEEPALIVE:
816 			session_keepalive(peer);
817 			break;
818 		case EVNT_RCVD_KEEPALIVE:
819 			start_timer_holdtime(peer);
820 			change_state(peer, STATE_ESTABLISHED, event);
821 			break;
822 		case EVNT_RCVD_NOTIFICATION:
823 			parse_notification(peer);
824 			change_state(peer, STATE_IDLE, event);
825 			break;
826 		default:
827 			session_notification(peer,
828 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
829 			change_state(peer, STATE_IDLE, event);
830 			break;
831 		}
832 		break;
833 	case STATE_ESTABLISHED:
834 		switch (event) {
835 		case EVNT_START:
836 			/* ignore */
837 			break;
838 		case EVNT_STOP:
839 			change_state(peer, STATE_IDLE, event);
840 			break;
841 		case EVNT_CON_CLOSED:
842 		case EVNT_CON_FATAL:
843 			change_state(peer, STATE_IDLE, event);
844 			break;
845 		case EVNT_TIMER_HOLDTIME:
846 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
847 			    0, NULL, 0);
848 			change_state(peer, STATE_IDLE, event);
849 			break;
850 		case EVNT_TIMER_KEEPALIVE:
851 			session_keepalive(peer);
852 			break;
853 		case EVNT_RCVD_KEEPALIVE:
854 			start_timer_holdtime(peer);
855 			break;
856 		case EVNT_RCVD_UPDATE:
857 			start_timer_holdtime(peer);
858 			if (parse_update(peer))
859 				change_state(peer, STATE_IDLE, event);
860 			else
861 				start_timer_holdtime(peer);
862 			break;
863 		case EVNT_RCVD_NOTIFICATION:
864 			parse_notification(peer);
865 			change_state(peer, STATE_IDLE, event);
866 			break;
867 		default:
868 			session_notification(peer,
869 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
870 			change_state(peer, STATE_IDLE, event);
871 			break;
872 		}
873 		break;
874 	}
875 }
876 
877 void
878 start_timer_holdtime(struct peer *peer)
879 {
880 	if (peer->holdtime > 0)
881 		timer_set(peer, Timer_Hold, peer->holdtime);
882 	else
883 		timer_stop(peer, Timer_Hold);
884 }
885 
886 void
887 start_timer_keepalive(struct peer *peer)
888 {
889 	if (peer->holdtime > 0)
890 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
891 	else
892 		timer_stop(peer, Timer_Keepalive);
893 }
894 
895 void
896 session_close_connection(struct peer *peer)
897 {
898 	if (peer->fd != -1) {
899 		close(peer->fd);
900 		pauseaccept = 0;
901 	}
902 	peer->fd = peer->wbuf.fd = -1;
903 }
904 
905 void
906 change_state(struct peer *peer, enum session_state state,
907     enum session_events event)
908 {
909 	struct mrt	*mrt;
910 
911 	switch (state) {
912 	case STATE_IDLE:
913 		/* carp demotion first. new peers handled in init_peer */
914 		if (peer->state == STATE_ESTABLISHED &&
915 		    peer->conf.demote_group[0] && !peer->demoted)
916 			session_demote(peer, +1);
917 
918 		/*
919 		 * try to write out what's buffered (maybe a notification),
920 		 * don't bother if it fails
921 		 */
922 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
923 			msgbuf_write(&peer->wbuf);
924 
925 		/*
926 		 * we must start the timer for the next EVNT_START
927 		 * if we are coming here due to an error and the
928 		 * session was not established successfully before, the
929 		 * starttimerinterval needs to be exponentially increased
930 		 */
931 		if (peer->IdleHoldTime == 0)
932 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
933 		peer->holdtime = INTERVAL_HOLD_INITIAL;
934 		timer_stop(peer, Timer_ConnectRetry);
935 		timer_stop(peer, Timer_Keepalive);
936 		timer_stop(peer, Timer_Hold);
937 		timer_stop(peer, Timer_IdleHold);
938 		timer_stop(peer, Timer_IdleHoldReset);
939 		session_close_connection(peer);
940 		msgbuf_clear(&peer->wbuf);
941 		free(peer->rbuf);
942 		peer->rbuf = NULL;
943 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
944 		if (peer->state == STATE_ESTABLISHED)
945 			session_down(peer);
946 		if (event != EVNT_STOP) {
947 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
948 			if (event != EVNT_NONE &&
949 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
950 				peer->IdleHoldTime *= 2;
951 		}
952 		if (peer->state == STATE_NONE ||
953 		    peer->state == STATE_ESTABLISHED) {
954 			/* initialize capability negotiation structures */
955 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
956 			    sizeof(peer->capa.ann));
957 			if (!peer->conf.announce_capa)
958 				session_capa_ann_none(peer);
959 		}
960 		break;
961 	case STATE_CONNECT:
962 		break;
963 	case STATE_ACTIVE:
964 		break;
965 	case STATE_OPENSENT:
966 		break;
967 	case STATE_OPENCONFIRM:
968 		break;
969 	case STATE_ESTABLISHED:
970 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
971 		if (peer->demoted)
972 			timer_set(peer, Timer_CarpUndemote,
973 			    INTERVAL_HOLD_DEMOTED);
974 		session_up(peer);
975 		break;
976 	default:		/* something seriously fucked */
977 		break;
978 	}
979 
980 	log_statechange(peer, state, event);
981 	LIST_FOREACH(mrt, &mrthead, entry) {
982 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
983 			continue;
984 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
985 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
986 		    mrt->group_id == peer->conf.groupid))
987 			mrt_dump_state(mrt, peer->state, state, peer);
988 	}
989 	peer->prev_state = peer->state;
990 	peer->state = state;
991 }
992 
993 void
994 session_accept(int listenfd)
995 {
996 	int			 connfd;
997 	int			 opt;
998 	socklen_t		 len;
999 	struct sockaddr_storage	 cliaddr;
1000 	struct peer		*p = NULL;
1001 
1002 	len = sizeof(cliaddr);
1003 	if ((connfd = accept(listenfd,
1004 	    (struct sockaddr *)&cliaddr, &len)) == -1) {
1005 		if (errno == ENFILE || errno == EMFILE) {
1006 			pauseaccept = getmonotime();
1007 			return;
1008 		} else if (errno == EWOULDBLOCK || errno == EINTR)
1009 			return;
1010 		else
1011 			log_warn("accept");
1012 	}
1013 
1014 	p = getpeerbyip((struct sockaddr *)&cliaddr);
1015 
1016 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1017 		if (timer_running(p, Timer_IdleHold, NULL)) {
1018 			/* fast reconnect after clear */
1019 			p->passive = 1;
1020 			bgp_fsm(p, EVNT_START);
1021 		}
1022 	}
1023 
1024 	if (p != NULL &&
1025 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1026 		if (p->fd != -1) {
1027 			if (p->state == STATE_CONNECT)
1028 				session_close_connection(p);
1029 			else {
1030 				close(connfd);
1031 				return;
1032 			}
1033 		}
1034 
1035 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1036 			log_peer_warnx(&p->conf,
1037 			    "ipsec or md5sig configured but not available");
1038 			close(connfd);
1039 			return;
1040 		}
1041 
1042 		if (p->conf.auth.method == AUTH_MD5SIG) {
1043 			if (sysdep.no_md5sig) {
1044 				log_peer_warnx(&p->conf,
1045 				    "md5sig configured but not available");
1046 				close(connfd);
1047 				return;
1048 			}
1049 			len = sizeof(opt);
1050 			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1051 			    &opt, &len) == -1)
1052 				fatal("getsockopt TCP_MD5SIG");
1053 			if (!opt) {	/* non-md5'd connection! */
1054 				log_peer_warnx(&p->conf,
1055 				    "connection attempt without md5 signature");
1056 				close(connfd);
1057 				return;
1058 			}
1059 		}
1060 		p->fd = p->wbuf.fd = connfd;
1061 		if (session_setup_socket(p)) {
1062 			close(connfd);
1063 			return;
1064 		}
1065 		session_socket_blockmode(connfd, BM_NONBLOCK);
1066 		bgp_fsm(p, EVNT_CON_OPEN);
1067 	} else {
1068 		log_conn_attempt(p, (struct sockaddr *)&cliaddr);
1069 		close(connfd);
1070 	}
1071 }
1072 
1073 int
1074 session_connect(struct peer *peer)
1075 {
1076 	int			 opt = 1;
1077 	struct sockaddr		*sa;
1078 
1079 	/*
1080 	 * we do not need the overcomplicated collision detection RFC 1771
1081 	 * describes; we simply make sure there is only ever one concurrent
1082 	 * tcp connection per peer.
1083 	 */
1084 	if (peer->fd != -1)
1085 		return (-1);
1086 
1087 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), SOCK_STREAM,
1088 	    IPPROTO_TCP)) == -1) {
1089 		log_peer_warn(&peer->conf, "session_connect socket");
1090 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1091 		return (-1);
1092 	}
1093 
1094 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1095 		log_peer_warnx(&peer->conf,
1096 		    "ipsec or md5sig configured but not available");
1097 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1098 		return (-1);
1099 	}
1100 
1101 	if (peer->conf.auth.method == AUTH_MD5SIG) {
1102 		if (sysdep.no_md5sig) {
1103 			log_peer_warnx(&peer->conf,
1104 			    "md5sig configured but not available");
1105 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1106 			return (-1);
1107 		}
1108 		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1109 		    &opt, sizeof(opt)) == -1) {
1110 			log_peer_warn(&peer->conf, "setsockopt md5sig");
1111 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1112 			return (-1);
1113 		}
1114 	}
1115 	peer->wbuf.fd = peer->fd;
1116 
1117 	/* if update source is set we need to bind() */
1118 	if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) {
1119 		if (bind(peer->fd, sa, sa->sa_len) == -1) {
1120 			log_peer_warn(&peer->conf, "session_connect bind");
1121 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1122 			return (-1);
1123 		}
1124 	}
1125 
1126 	if (session_setup_socket(peer)) {
1127 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1128 		return (-1);
1129 	}
1130 
1131 	session_socket_blockmode(peer->fd, BM_NONBLOCK);
1132 
1133 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT);
1134 	if (connect(peer->fd, sa, sa->sa_len) == -1) {
1135 		if (errno != EINPROGRESS) {
1136 			if (errno != peer->lasterr)
1137 				log_peer_warn(&peer->conf, "connect");
1138 			peer->lasterr = errno;
1139 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1140 			return (-1);
1141 		}
1142 	} else
1143 		bgp_fsm(peer, EVNT_CON_OPEN);
1144 
1145 	return (0);
1146 }
1147 
1148 int
1149 session_setup_socket(struct peer *p)
1150 {
1151 	int	ttl = p->conf.distance;
1152 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1153 	int	nodelay = 1;
1154 	int	bsize;
1155 
1156 	switch (p->conf.remote_addr.aid) {
1157 	case AID_INET:
1158 		/* set precedence, see RFC 1771 appendix 5 */
1159 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1160 		    -1) {
1161 			log_peer_warn(&p->conf,
1162 			    "session_setup_socket setsockopt TOS");
1163 			return (-1);
1164 		}
1165 
1166 		if (p->conf.ebgp) {
1167 			/* set TTL to foreign router's distance
1168 			   1=direct n=multihop with ttlsec, we always use 255 */
1169 			if (p->conf.ttlsec) {
1170 				ttl = 256 - p->conf.distance;
1171 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1172 				    &ttl, sizeof(ttl)) == -1) {
1173 					log_peer_warn(&p->conf,
1174 					    "session_setup_socket: "
1175 					    "setsockopt MINTTL");
1176 					return (-1);
1177 				}
1178 				ttl = 255;
1179 			}
1180 
1181 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1182 			    sizeof(ttl)) == -1) {
1183 				log_peer_warn(&p->conf,
1184 				    "session_setup_socket setsockopt TTL");
1185 				return (-1);
1186 			}
1187 		}
1188 		break;
1189 	case AID_INET6:
1190 		if (p->conf.ebgp) {
1191 			/* set hoplimit to foreign router's distance */
1192 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1193 			    &ttl, sizeof(ttl)) == -1) {
1194 				log_peer_warn(&p->conf,
1195 				    "session_setup_socket setsockopt hoplimit");
1196 				return (-1);
1197 			}
1198 		}
1199 		break;
1200 	}
1201 
1202 	/* set TCP_NODELAY */
1203 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1204 	    sizeof(nodelay)) == -1) {
1205 		log_peer_warn(&p->conf,
1206 		    "session_setup_socket setsockopt TCP_NODELAY");
1207 		return (-1);
1208 	}
1209 
1210 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1211 	if (p->conf.auth.method != AUTH_NONE) {
1212 		/* try to increase bufsize. no biggie if it fails */
1213 		bsize = 65535;
1214 		while (bsize > 8192 &&
1215 		    setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1216 		    sizeof(bsize)) == -1 && errno != EINVAL)
1217 			bsize /= 2;
1218 		bsize = 65535;
1219 		while (bsize > 8192 &&
1220 		    setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1221 		    sizeof(bsize)) == -1 && errno != EINVAL)
1222 			bsize /= 2;
1223 	}
1224 
1225 	return (0);
1226 }
1227 
1228 void
1229 session_socket_blockmode(int fd, enum blockmodes bm)
1230 {
1231 	int	flags;
1232 
1233 	if ((flags = fcntl(fd, F_GETFL, 0)) == -1)
1234 		fatal("fcntl F_GETFL");
1235 
1236 	if (bm == BM_NONBLOCK)
1237 		flags |= O_NONBLOCK;
1238 	else
1239 		flags &= ~O_NONBLOCK;
1240 
1241 	if ((flags = fcntl(fd, F_SETFL, flags)) == -1)
1242 		fatal("fcntl F_SETFL");
1243 }
1244 
1245 void
1246 session_tcp_established(struct peer *peer)
1247 {
1248 	socklen_t	len;
1249 
1250 	len = sizeof(peer->sa_local);
1251 	if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local,
1252 	    &len) == -1)
1253 		log_warn("getsockname");
1254 	len = sizeof(peer->sa_remote);
1255 	if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote,
1256 	    &len) == -1)
1257 		log_warn("getpeername");
1258 }
1259 
1260 void
1261 session_capa_ann_none(struct peer *peer)
1262 {
1263 	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1264 }
1265 
1266 int
1267 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len)
1268 {
1269 	int errs = 0;
1270 
1271 	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1272 	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1273 	return (errs);
1274 }
1275 
1276 int
1277 session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
1278 {
1279 	u_int8_t		 safi, pad = 0;
1280 	u_int16_t		 afi;
1281 	int			 errs = 0;
1282 
1283 	if (aid2afi(aid, &afi, &safi) == -1)
1284 		fatalx("session_capa_add_mp: bad afi/safi pair");
1285 	afi = htons(afi);
1286 	errs += ibuf_add(buf, &afi, sizeof(afi));
1287 	errs += ibuf_add(buf, &pad, sizeof(pad));
1288 	errs += ibuf_add(buf, &safi, sizeof(safi));
1289 
1290 	return (errs);
1291 }
1292 
1293 struct bgp_msg *
1294 session_newmsg(enum msg_type msgtype, u_int16_t len)
1295 {
1296 	struct bgp_msg		*msg;
1297 	struct msg_header	 hdr;
1298 	struct ibuf		*buf;
1299 	int			 errs = 0;
1300 
1301 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1302 	hdr.len = htons(len);
1303 	hdr.type = msgtype;
1304 
1305 	if ((buf = ibuf_open(len)) == NULL)
1306 		return (NULL);
1307 
1308 	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1309 	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1310 	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1311 
1312 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1313 		ibuf_free(buf);
1314 		return (NULL);
1315 	}
1316 
1317 	msg->buf = buf;
1318 	msg->type = msgtype;
1319 	msg->len = len;
1320 
1321 	return (msg);
1322 }
1323 
1324 int
1325 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1326 {
1327 	struct mrt		*mrt;
1328 
1329 	LIST_FOREACH(mrt, &mrthead, entry) {
1330 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1331 		    mrt->type == MRT_UPDATE_OUT)))
1332 			continue;
1333 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1334 		    mrt->peer_id == p->conf.id || (mrt->group_id == 0 &&
1335 		    mrt->group_id == p->conf.groupid))
1336 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1337 	}
1338 
1339 	ibuf_close(&p->wbuf, msg->buf);
1340 	free(msg);
1341 	return (0);
1342 }
1343 
1344 void
1345 session_open(struct peer *p)
1346 {
1347 	struct bgp_msg		*buf;
1348 	struct ibuf		*opb;
1349 	struct msg_open		 msg;
1350 	u_int16_t		 len;
1351 	u_int8_t		 i, op_type, optparamlen = 0;
1352 	int			 errs = 0;
1353 
1354 
1355 	if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1356 	    sizeof(optparamlen))) == NULL) {
1357 		bgp_fsm(p, EVNT_CON_FATAL);
1358 		return;
1359 	}
1360 
1361 	/* multiprotocol extensions, RFC 4760 */
1362 	for (i = 0; i < AID_MAX; i++)
1363 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1364 			errs += session_capa_add(opb, CAPA_MP, 4);
1365 			errs += session_capa_add_mp(opb, i);
1366 		}
1367 
1368 	/* route refresh, RFC 2918 */
1369 	if (p->capa.ann.refresh)	/* no data */
1370 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1371 
1372 	/* End-of-RIB marker, RFC 4724 */
1373 	if (p->capa.ann.restart) {	/* 2 bytes data */
1374 		u_char		c[2];
1375 
1376 		c[0] = 0x80; /* we're always restarting */
1377 		c[1] = 0;
1378 		errs += session_capa_add(opb, CAPA_RESTART, 2);
1379 		errs += ibuf_add(opb, &c, 2);
1380 	}
1381 
1382 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1383 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1384 		u_int32_t	nas;
1385 
1386 		nas = htonl(conf->as);
1387 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1388 		errs += ibuf_add(opb, &nas, sizeof(nas));
1389 	}
1390 
1391 	if (ibuf_size(opb))
1392 		optparamlen = ibuf_size(opb) + sizeof(op_type) +
1393 		    sizeof(optparamlen);
1394 
1395 	len = MSGSIZE_OPEN_MIN + optparamlen;
1396 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1397 		ibuf_free(opb);
1398 		bgp_fsm(p, EVNT_CON_FATAL);
1399 		return;
1400 	}
1401 
1402 	msg.version = 4;
1403 	msg.myas = htons(conf->short_as);
1404 	if (p->conf.holdtime)
1405 		msg.holdtime = htons(p->conf.holdtime);
1406 	else
1407 		msg.holdtime = htons(conf->holdtime);
1408 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1409 	msg.optparamlen = optparamlen;
1410 
1411 	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1412 	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1413 	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1414 	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1415 	errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1416 
1417 	if (optparamlen) {
1418 		op_type = OPT_PARAM_CAPABILITIES;
1419 		optparamlen = ibuf_size(opb);
1420 		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1421 		errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1422 		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1423 	}
1424 
1425 	ibuf_free(opb);
1426 
1427 	if (errs) {
1428 		ibuf_free(buf->buf);
1429 		free(buf);
1430 		bgp_fsm(p, EVNT_CON_FATAL);
1431 		return;
1432 	}
1433 
1434 	if (session_sendmsg(buf, p) == -1) {
1435 		bgp_fsm(p, EVNT_CON_FATAL);
1436 		return;
1437 	}
1438 
1439 	p->stats.msg_sent_open++;
1440 }
1441 
1442 void
1443 session_keepalive(struct peer *p)
1444 {
1445 	struct bgp_msg		*buf;
1446 
1447 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1448 	    session_sendmsg(buf, p) == -1) {
1449 		bgp_fsm(p, EVNT_CON_FATAL);
1450 		return;
1451 	}
1452 
1453 	start_timer_keepalive(p);
1454 	p->stats.msg_sent_keepalive++;
1455 }
1456 
1457 void
1458 session_update(u_int32_t peerid, void *data, size_t datalen)
1459 {
1460 	struct peer		*p;
1461 	struct bgp_msg		*buf;
1462 
1463 	if ((p = getpeerbyid(peerid)) == NULL) {
1464 		log_warnx("no such peer: id=%u", peerid);
1465 		return;
1466 	}
1467 
1468 	if (p->state != STATE_ESTABLISHED)
1469 		return;
1470 
1471 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1472 		bgp_fsm(p, EVNT_CON_FATAL);
1473 		return;
1474 	}
1475 
1476 	if (ibuf_add(buf->buf, data, datalen)) {
1477 		ibuf_free(buf->buf);
1478 		free(buf);
1479 		bgp_fsm(p, EVNT_CON_FATAL);
1480 		return;
1481 	}
1482 
1483 	if (session_sendmsg(buf, p) == -1) {
1484 		bgp_fsm(p, EVNT_CON_FATAL);
1485 		return;
1486 	}
1487 
1488 	start_timer_keepalive(p);
1489 	p->stats.msg_sent_update++;
1490 }
1491 
1492 void
1493 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1494     void *data, ssize_t datalen)
1495 {
1496 	struct bgp_msg		*buf;
1497 	int			 errs = 0;
1498 
1499 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1500 		return;
1501 
1502 	log_notification(p, errcode, subcode, data, datalen, "sending");
1503 
1504 	if ((buf = session_newmsg(NOTIFICATION,
1505 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1506 		bgp_fsm(p, EVNT_CON_FATAL);
1507 		return;
1508 	}
1509 
1510 	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1511 	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1512 
1513 	if (datalen > 0)
1514 		errs += ibuf_add(buf->buf, data, datalen);
1515 
1516 	if (errs) {
1517 		ibuf_free(buf->buf);
1518 		free(buf);
1519 		bgp_fsm(p, EVNT_CON_FATAL);
1520 		return;
1521 	}
1522 
1523 	if (session_sendmsg(buf, p) == -1) {
1524 		bgp_fsm(p, EVNT_CON_FATAL);
1525 		return;
1526 	}
1527 
1528 	p->stats.msg_sent_notification++;
1529 	p->stats.last_sent_errcode = errcode;
1530 	p->stats.last_sent_suberr = subcode;
1531 }
1532 
1533 int
1534 session_neighbor_rrefresh(struct peer *p)
1535 {
1536 	u_int8_t	i;
1537 
1538 	if (!p->capa.peer.refresh)
1539 		return (-1);
1540 
1541 	for (i = 0; i < AID_MAX; i++) {
1542 		if (p->capa.peer.mp[i] != 0)
1543 			session_rrefresh(p, i);
1544 	}
1545 
1546 	return (0);
1547 }
1548 
1549 void
1550 session_rrefresh(struct peer *p, u_int8_t aid)
1551 {
1552 	struct bgp_msg		*buf;
1553 	int			 errs = 0;
1554 	u_int16_t		 afi;
1555 	u_int8_t		 safi, null8 = 0;
1556 
1557 	if (aid2afi(aid, &afi, &safi) == -1)
1558 		fatalx("session_rrefresh: bad afi/safi pair");
1559 
1560 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1561 		bgp_fsm(p, EVNT_CON_FATAL);
1562 		return;
1563 	}
1564 
1565 	afi = htons(afi);
1566 	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1567 	errs += ibuf_add(buf->buf, &null8, sizeof(null8));
1568 	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1569 
1570 	if (errs) {
1571 		ibuf_free(buf->buf);
1572 		free(buf);
1573 		bgp_fsm(p, EVNT_CON_FATAL);
1574 		return;
1575 	}
1576 
1577 	if (session_sendmsg(buf, p) == -1) {
1578 		bgp_fsm(p, EVNT_CON_FATAL);
1579 		return;
1580 	}
1581 
1582 	p->stats.msg_sent_rrefresh++;
1583 }
1584 
1585 int
1586 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1587 {
1588 	ssize_t		n;
1589 	socklen_t	len;
1590 	int		error;
1591 
1592 	if (p->state == STATE_CONNECT) {
1593 		if (pfd->revents & POLLOUT) {
1594 			if (pfd->revents & POLLIN) {
1595 				/* error occurred */
1596 				len = sizeof(error);
1597 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1598 				    &error, &len) == -1 || error) {
1599 					if (error)
1600 						errno = error;
1601 					if (errno != p->lasterr) {
1602 						log_peer_warn(&p->conf,
1603 						    "socket error");
1604 						p->lasterr = errno;
1605 					}
1606 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1607 					return (1);
1608 				}
1609 			}
1610 			bgp_fsm(p, EVNT_CON_OPEN);
1611 			return (1);
1612 		}
1613 		if (pfd->revents & POLLHUP) {
1614 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1615 			return (1);
1616 		}
1617 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1618 			bgp_fsm(p, EVNT_CON_FATAL);
1619 			return (1);
1620 		}
1621 		return (0);
1622 	}
1623 
1624 	if (pfd->revents & POLLHUP) {
1625 		bgp_fsm(p, EVNT_CON_CLOSED);
1626 		return (1);
1627 	}
1628 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1629 		bgp_fsm(p, EVNT_CON_FATAL);
1630 		return (1);
1631 	}
1632 
1633 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1634 		if ((error = msgbuf_write(&p->wbuf)) < 0) {
1635 			if (error == -2)
1636 				log_peer_warnx(&p->conf, "Connection closed");
1637 			else
1638 				log_peer_warn(&p->conf, "write error");
1639 			bgp_fsm(p, EVNT_CON_FATAL);
1640 			return (1);
1641 		}
1642 		if (!(pfd->revents & POLLIN))
1643 			return (1);
1644 	}
1645 
1646 	if (p->rbuf && pfd->revents & POLLIN) {
1647 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1648 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1649 			if (errno != EINTR && errno != EAGAIN) {
1650 				log_peer_warn(&p->conf, "read error");
1651 				bgp_fsm(p, EVNT_CON_FATAL);
1652 			}
1653 			return (1);
1654 		}
1655 		if (n == 0) {	/* connection closed */
1656 			bgp_fsm(p, EVNT_CON_CLOSED);
1657 			return (1);
1658 		}
1659 
1660 		p->rbuf->wpos += n;
1661 		p->stats.last_read = time(NULL);
1662 		return (1);
1663 	}
1664 	return (0);
1665 }
1666 
1667 int
1668 session_process_msg(struct peer *p)
1669 {
1670 	ssize_t		rpos, av, left;
1671 	int		processed = 0;
1672 	u_int16_t	msglen;
1673 	u_int8_t	msgtype;
1674 
1675 	rpos = 0;
1676 	av = p->rbuf->wpos;
1677 
1678 	/*
1679 	 * session might drop to IDLE -> buffers deallocated
1680 	 * we MUST check rbuf != NULL before use
1681 	 */
1682 	for (;;) {
1683 		if (rpos + MSGSIZE_HEADER > av)
1684 			break;
1685 		if (p->rbuf == NULL)
1686 			break;
1687 		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1688 		    &msgtype) == -1)
1689 			return (0);
1690 		if (rpos + msglen > av)
1691 			break;
1692 		p->rbuf->rptr = p->rbuf->buf + rpos;
1693 
1694 		switch (msgtype) {
1695 		case OPEN:
1696 			bgp_fsm(p, EVNT_RCVD_OPEN);
1697 			p->stats.msg_rcvd_open++;
1698 			break;
1699 		case UPDATE:
1700 			bgp_fsm(p, EVNT_RCVD_UPDATE);
1701 			p->stats.msg_rcvd_update++;
1702 			break;
1703 		case NOTIFICATION:
1704 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1705 			p->stats.msg_rcvd_notification++;
1706 			break;
1707 		case KEEPALIVE:
1708 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1709 			p->stats.msg_rcvd_keepalive++;
1710 			break;
1711 		case RREFRESH:
1712 			parse_refresh(p);
1713 			p->stats.msg_rcvd_rrefresh++;
1714 			break;
1715 		default:	/* cannot happen */
1716 			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
1717 			    &msgtype, 1);
1718 			log_warnx("received message with unknown type %u",
1719 			    msgtype);
1720 			bgp_fsm(p, EVNT_CON_FATAL);
1721 		}
1722 		rpos += msglen;
1723 		if (++processed > MSG_PROCESS_LIMIT)
1724 			break;
1725 	}
1726 	if (p->rbuf == NULL)
1727 		return (1);
1728 
1729 	if (rpos < av) {
1730 		left = av - rpos;
1731 		memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1732 		p->rbuf->wpos = left;
1733 	} else
1734 		p->rbuf->wpos = 0;
1735 
1736 	return (1);
1737 }
1738 
1739 int
1740 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1741 {
1742 	struct mrt		*mrt;
1743 	u_char			*p;
1744 	u_int16_t		 olen;
1745 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1746 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1747 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1748 
1749 	/* caller MUST make sure we are getting 19 bytes! */
1750 	p = data;
1751 	if (memcmp(p, marker, sizeof(marker))) {
1752 		log_peer_warnx(&peer->conf, "sync error");
1753 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1754 		bgp_fsm(peer, EVNT_CON_FATAL);
1755 		return (-1);
1756 	}
1757 	p += MSGSIZE_HEADER_MARKER;
1758 
1759 	memcpy(&olen, p, 2);
1760 	*len = ntohs(olen);
1761 	p += 2;
1762 	memcpy(type, p, 1);
1763 
1764 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1765 		log_peer_warnx(&peer->conf,
1766 		    "received message: illegal length: %u byte", *len);
1767 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1768 		    &olen, sizeof(olen));
1769 		bgp_fsm(peer, EVNT_CON_FATAL);
1770 		return (-1);
1771 	}
1772 
1773 	switch (*type) {
1774 	case OPEN:
1775 		if (*len < MSGSIZE_OPEN_MIN) {
1776 			log_peer_warnx(&peer->conf,
1777 			    "received OPEN: illegal len: %u byte", *len);
1778 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1779 			    &olen, sizeof(olen));
1780 			bgp_fsm(peer, EVNT_CON_FATAL);
1781 			return (-1);
1782 		}
1783 		break;
1784 	case NOTIFICATION:
1785 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1786 			log_peer_warnx(&peer->conf,
1787 			    "received NOTIFICATION: illegal len: %u byte",
1788 			    *len);
1789 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1790 			    &olen, sizeof(olen));
1791 			bgp_fsm(peer, EVNT_CON_FATAL);
1792 			return (-1);
1793 		}
1794 		break;
1795 	case UPDATE:
1796 		if (*len < MSGSIZE_UPDATE_MIN) {
1797 			log_peer_warnx(&peer->conf,
1798 			    "received UPDATE: illegal len: %u byte", *len);
1799 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1800 			    &olen, sizeof(olen));
1801 			bgp_fsm(peer, EVNT_CON_FATAL);
1802 			return (-1);
1803 		}
1804 		break;
1805 	case KEEPALIVE:
1806 		if (*len != MSGSIZE_KEEPALIVE) {
1807 			log_peer_warnx(&peer->conf,
1808 			    "received KEEPALIVE: illegal len: %u byte", *len);
1809 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1810 			    &olen, sizeof(olen));
1811 			bgp_fsm(peer, EVNT_CON_FATAL);
1812 			return (-1);
1813 		}
1814 		break;
1815 	case RREFRESH:
1816 		if (*len != MSGSIZE_RREFRESH) {
1817 			log_peer_warnx(&peer->conf,
1818 			    "received RREFRESH: illegal len: %u byte", *len);
1819 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1820 			    &olen, sizeof(olen));
1821 			bgp_fsm(peer, EVNT_CON_FATAL);
1822 			return (-1);
1823 		}
1824 		break;
1825 	default:
1826 		log_peer_warnx(&peer->conf,
1827 		    "received msg with unknown type %u", *type);
1828 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1829 		    type, 1);
1830 		bgp_fsm(peer, EVNT_CON_FATAL);
1831 		return (-1);
1832 	}
1833 	LIST_FOREACH(mrt, &mrthead, entry) {
1834 		if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE &&
1835 		    mrt->type == MRT_UPDATE_IN)))
1836 			continue;
1837 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1838 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1839 		    mrt->group_id == peer->conf.groupid))
1840 			mrt_dump_bgp_msg(mrt, data, *len, peer);
1841 	}
1842 	return (0);
1843 }
1844 
1845 int
1846 parse_open(struct peer *peer)
1847 {
1848 	u_char		*p, *op_val;
1849 	u_int8_t	 version, rversion;
1850 	u_int16_t	 short_as, msglen;
1851 	u_int16_t	 holdtime, oholdtime, myholdtime;
1852 	u_int32_t	 as, bgpid;
1853 	u_int8_t	 optparamlen, plen;
1854 	u_int8_t	 op_type, op_len;
1855 
1856 	p = peer->rbuf->rptr;
1857 	p += MSGSIZE_HEADER_MARKER;
1858 	memcpy(&msglen, p, sizeof(msglen));
1859 	msglen = ntohs(msglen);
1860 
1861 	p = peer->rbuf->rptr;
1862 	p += MSGSIZE_HEADER;	/* header is already checked */
1863 
1864 	memcpy(&version, p, sizeof(version));
1865 	p += sizeof(version);
1866 
1867 	if (version != BGP_VERSION) {
1868 		log_peer_warnx(&peer->conf,
1869 		    "peer wants unrecognized version %u", version);
1870 		if (version > BGP_VERSION)
1871 			rversion = version - BGP_VERSION;
1872 		else
1873 			rversion = BGP_VERSION;
1874 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
1875 		    &rversion, sizeof(rversion));
1876 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1877 		return (-1);
1878 	}
1879 
1880 	memcpy(&short_as, p, sizeof(short_as));
1881 	p += sizeof(short_as);
1882 	as = peer->short_as = ntohs(short_as);
1883 
1884 	memcpy(&oholdtime, p, sizeof(oholdtime));
1885 	p += sizeof(oholdtime);
1886 
1887 	holdtime = ntohs(oholdtime);
1888 	if (holdtime && holdtime < peer->conf.min_holdtime) {
1889 		log_peer_warnx(&peer->conf,
1890 		    "peer requests unacceptable holdtime %u", holdtime);
1891 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
1892 		    NULL, 0);
1893 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1894 		return (-1);
1895 	}
1896 
1897 	myholdtime = peer->conf.holdtime;
1898 	if (!myholdtime)
1899 		myholdtime = conf->holdtime;
1900 	if (holdtime < myholdtime)
1901 		peer->holdtime = holdtime;
1902 	else
1903 		peer->holdtime = myholdtime;
1904 
1905 	memcpy(&bgpid, p, sizeof(bgpid));
1906 	p += sizeof(bgpid);
1907 
1908 	/* check bgpid for validity - just disallow 0 */
1909 	if (ntohl(bgpid) == 0) {
1910 		log_peer_warnx(&peer->conf, "peer BGPID %lu unacceptable",
1911 		    ntohl(bgpid));
1912 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
1913 		    NULL, 0);
1914 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1915 		return (-1);
1916 	}
1917 	peer->remote_bgpid = bgpid;
1918 
1919 	memcpy(&optparamlen, p, sizeof(optparamlen));
1920 	p += sizeof(optparamlen);
1921 
1922 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
1923 			log_peer_warnx(&peer->conf,
1924 			    "corrupt OPEN message received: length mismatch");
1925 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1926 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1927 			return (-1);
1928 	}
1929 
1930 	plen = optparamlen;
1931 	while (plen > 0) {
1932 		if (plen < 2) {
1933 			log_peer_warnx(&peer->conf,
1934 			    "corrupt OPEN message received, len wrong");
1935 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1936 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1937 			return (-1);
1938 		}
1939 		memcpy(&op_type, p, sizeof(op_type));
1940 		p += sizeof(op_type);
1941 		plen -= sizeof(op_type);
1942 		memcpy(&op_len, p, sizeof(op_len));
1943 		p += sizeof(op_len);
1944 		plen -= sizeof(op_len);
1945 		if (op_len > 0) {
1946 			if (plen < op_len) {
1947 				log_peer_warnx(&peer->conf,
1948 				    "corrupt OPEN message received, len wrong");
1949 				session_notification(peer, ERR_OPEN, 0,
1950 				    NULL, 0);
1951 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1952 				return (-1);
1953 			}
1954 			op_val = p;
1955 			p += op_len;
1956 			plen -= op_len;
1957 		} else
1958 			op_val = NULL;
1959 
1960 		switch (op_type) {
1961 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
1962 			if (parse_capabilities(peer, op_val, op_len,
1963 			    &as) == -1) {
1964 				session_notification(peer, ERR_OPEN, 0,
1965 				    NULL, 0);
1966 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1967 				return (-1);
1968 			}
1969 			break;
1970 		case OPT_PARAM_AUTH:			/* deprecated */
1971 		default:
1972 			/*
1973 			 * unsupported type
1974 			 * the RFCs tell us to leave the data section empty
1975 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
1976 			 * How the peer should know _which_ optional parameter
1977 			 * we don't support is beyond me.
1978 			 */
1979 			log_peer_warnx(&peer->conf,
1980 			    "received OPEN message with unsupported optional "
1981 			    "parameter: type %u", op_type);
1982 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
1983 				NULL, 0);
1984 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1985 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
1986 			peer->IdleHoldTime /= 2;
1987 			return (-1);
1988 		}
1989 	}
1990 
1991 	/* if remote-as is zero and it's a cloned neighbor, accept any */
1992 	if (peer->conf.cloned && !peer->conf.remote_as && as != AS_TRANS) {
1993 		peer->conf.remote_as = as;
1994 		peer->conf.ebgp = (peer->conf.remote_as != conf->as);
1995 		if (!peer->conf.ebgp)
1996 			/* force enforce_as off for iBGP sessions */
1997 			peer->conf.enforce_as = ENFORCE_AS_OFF;
1998 	}
1999 
2000 	if (peer->conf.remote_as != as) {
2001 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2002 		    log_as(as));
2003 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2004 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2005 		return (-1);
2006 	}
2007 
2008 	if (capa_neg_calc(peer) == -1) {
2009 		log_peer_warnx(&peer->conf,
2010 		    "capability negotiation calculation failed");
2011 		session_notification(peer, ERR_OPEN, 0, NULL, 0);
2012 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2013 		return (-1);
2014 	}
2015 
2016 	return (0);
2017 }
2018 
2019 int
2020 parse_update(struct peer *peer)
2021 {
2022 	u_char		*p;
2023 	u_int16_t	 datalen;
2024 
2025 	/*
2026 	 * we pass the message verbatim to the rde.
2027 	 * in case of errors the whole session is reset with a
2028 	 * notification anyway, we only need to know the peer
2029 	 */
2030 	p = peer->rbuf->rptr;
2031 	p += MSGSIZE_HEADER_MARKER;
2032 	memcpy(&datalen, p, sizeof(datalen));
2033 	datalen = ntohs(datalen);
2034 
2035 	p = peer->rbuf->rptr;
2036 	p += MSGSIZE_HEADER;	/* header is already checked */
2037 	datalen -= MSGSIZE_HEADER;
2038 
2039 	if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p,
2040 	    datalen) == -1)
2041 		return (-1);
2042 
2043 	return (0);
2044 }
2045 
2046 int
2047 parse_refresh(struct peer *peer)
2048 {
2049 	u_char		*p;
2050 	u_int16_t	 afi;
2051 	u_int8_t	 aid, safi;
2052 
2053 	p = peer->rbuf->rptr;
2054 	p += MSGSIZE_HEADER;	/* header is already checked */
2055 
2056 	/*
2057 	 * We could check if we actually announced the capability but
2058 	 * as long as the message is correctly encoded we don't care.
2059 	 */
2060 
2061 	/* afi, 2 byte */
2062 	memcpy(&afi, p, sizeof(afi));
2063 	afi = ntohs(afi);
2064 	p += 2;
2065 	/* reserved, 1 byte */
2066 	p += 1;
2067 	/* safi, 1 byte */
2068 	memcpy(&safi, p, sizeof(safi));
2069 
2070 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2071 	if (afi2aid(afi, safi, &aid) == -1) {
2072 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2073 		    "invalid afi/safi pair");
2074 		return (0);
2075 	}
2076 
2077 	if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid,
2078 	    sizeof(aid)) == -1)
2079 		return (-1);
2080 
2081 	return (0);
2082 }
2083 
2084 int
2085 parse_notification(struct peer *peer)
2086 {
2087 	u_char		*p;
2088 	u_int16_t	 datalen;
2089 	u_int8_t	 errcode;
2090 	u_int8_t	 subcode;
2091 	u_int8_t	 capa_code;
2092 	u_int8_t	 capa_len;
2093 	u_int8_t	 i;
2094 
2095 	/* just log */
2096 	p = peer->rbuf->rptr;
2097 	p += MSGSIZE_HEADER_MARKER;
2098 	memcpy(&datalen, p, sizeof(datalen));
2099 	datalen = ntohs(datalen);
2100 
2101 	p = peer->rbuf->rptr;
2102 	p += MSGSIZE_HEADER;	/* header is already checked */
2103 	datalen -= MSGSIZE_HEADER;
2104 
2105 	memcpy(&errcode, p, sizeof(errcode));
2106 	p += sizeof(errcode);
2107 	datalen -= sizeof(errcode);
2108 
2109 	memcpy(&subcode, p, sizeof(subcode));
2110 	p += sizeof(subcode);
2111 	datalen -= sizeof(subcode);
2112 
2113 	log_notification(peer, errcode, subcode, p, datalen, "received");
2114 	peer->errcnt++;
2115 
2116 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2117 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2118 			log_peer_warnx(&peer->conf, "received \"unsupported "
2119 			    "capability\" notification without data part, "
2120 			    "disabling capability announcements altogether");
2121 			session_capa_ann_none(peer);
2122 		}
2123 
2124 		while (datalen > 0) {
2125 			if (datalen < 2) {
2126 				log_peer_warnx(&peer->conf,
2127 				    "parse_notification: "
2128 				    "expect len >= 2, len is %u", datalen);
2129 				return (-1);
2130 			}
2131 			memcpy(&capa_code, p, sizeof(capa_code));
2132 			p += sizeof(capa_code);
2133 			datalen -= sizeof(capa_code);
2134 			memcpy(&capa_len, p, sizeof(capa_len));
2135 			p += sizeof(capa_len);
2136 			datalen -= sizeof(capa_len);
2137 			if (datalen < capa_len) {
2138 				log_peer_warnx(&peer->conf,
2139 				    "parse_notification: capa_len %u exceeds "
2140 				    "remaining msg length %u", capa_len,
2141 				    datalen);
2142 				return (-1);
2143 			}
2144 			p += capa_len;
2145 			datalen -= capa_len;
2146 			switch (capa_code) {
2147 			case CAPA_MP:
2148 				for (i = 0; i < AID_MAX; i++)
2149 					peer->capa.ann.mp[i] = 0;
2150 				log_peer_warnx(&peer->conf,
2151 				    "disabling multiprotocol capability");
2152 				break;
2153 			case CAPA_REFRESH:
2154 				peer->capa.ann.refresh = 0;
2155 				log_peer_warnx(&peer->conf,
2156 				    "disabling route refresh capability");
2157 				break;
2158 			case CAPA_RESTART:
2159 				peer->capa.ann.restart = 0;
2160 				log_peer_warnx(&peer->conf,
2161 				    "disabling restart capability");
2162 				break;
2163 			case CAPA_AS4BYTE:
2164 				peer->capa.ann.as4byte = 0;
2165 				log_peer_warnx(&peer->conf,
2166 				    "disabling 4-byte AS num capability");
2167 				break;
2168 			default:	/* should not happen... */
2169 				log_peer_warnx(&peer->conf, "received "
2170 				    "\"unsupported capability\" notification "
2171 				    "for unknown capability %u, disabling "
2172 				    "capability announcements altogether",
2173 				    capa_code);
2174 				session_capa_ann_none(peer);
2175 				break;
2176 			}
2177 		}
2178 
2179 		return (1);
2180 	}
2181 
2182 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2183 		session_capa_ann_none(peer);
2184 		return (1);
2185 	}
2186 
2187 	return (0);
2188 }
2189 
2190 int
2191 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2192 {
2193 	u_char		*capa_val;
2194 	u_int32_t	 remote_as;
2195 	u_int16_t	 len;
2196 	u_int16_t	 afi;
2197 	u_int8_t	 safi;
2198 	u_int8_t	 aid;
2199 	u_int8_t	 capa_code;
2200 	u_int8_t	 capa_len;
2201 
2202 	len = dlen;
2203 	while (len > 0) {
2204 		if (len < 2) {
2205 			log_peer_warnx(&peer->conf, "parse_capabilities: "
2206 			    "expect len >= 2, len is %u", len);
2207 			return (-1);
2208 		}
2209 		memcpy(&capa_code, d, sizeof(capa_code));
2210 		d += sizeof(capa_code);
2211 		len -= sizeof(capa_code);
2212 		memcpy(&capa_len, d, sizeof(capa_len));
2213 		d += sizeof(capa_len);
2214 		len -= sizeof(capa_len);
2215 		if (capa_len > 0) {
2216 			if (len < capa_len) {
2217 				log_peer_warnx(&peer->conf,
2218 				    "parse_capabilities: "
2219 				    "len %u smaller than capa_len %u",
2220 				    len, capa_len);
2221 				return (-1);
2222 			}
2223 			capa_val = d;
2224 			d += capa_len;
2225 			len -= capa_len;
2226 		} else
2227 			capa_val = NULL;
2228 
2229 		switch (capa_code) {
2230 		case CAPA_MP:			/* RFC 4760 */
2231 			if (capa_len != 4) {
2232 				log_peer_warnx(&peer->conf,
2233 				    "parse_capabilities: "
2234 				    "expect len 4, len is %u", capa_len);
2235 				return (-1);
2236 			}
2237 			memcpy(&afi, capa_val, sizeof(afi));
2238 			afi = ntohs(afi);
2239 			memcpy(&safi, capa_val + 3, sizeof(safi));
2240 			if (afi2aid(afi, safi, &aid) == -1) {
2241 				log_peer_warnx(&peer->conf,
2242 				    "parse_capabilities: AFI %u, "
2243 				    "safi %u unknown", afi, safi);
2244 				break;
2245 			}
2246 			peer->capa.peer.mp[aid] = 1;
2247 			break;
2248 		case CAPA_REFRESH:
2249 			peer->capa.peer.refresh = 1;
2250 			break;
2251 		case CAPA_RESTART:
2252 			peer->capa.peer.restart = 1;
2253 			/* we don't care about the further restart capas yet */
2254 			break;
2255 		case CAPA_AS4BYTE:
2256 			if (capa_len != 4) {
2257 				log_peer_warnx(&peer->conf,
2258 				    "parse_capabilities: "
2259 				    "expect len 4, len is %u", capa_len);
2260 				return (-1);
2261 			}
2262 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2263 			*as = ntohl(remote_as);
2264 			peer->capa.peer.as4byte = 1;
2265 			break;
2266 		default:
2267 			break;
2268 		}
2269 	}
2270 
2271 	return (0);
2272 }
2273 
2274 int
2275 capa_neg_calc(struct peer *p)
2276 {
2277 	u_int8_t	i, hasmp = 0;
2278 
2279 	/* refresh: does not realy matter here, use peer setting */
2280 	p->capa.neg.refresh = p->capa.peer.refresh;
2281 
2282 	/* as4byte: both side must announce capability */
2283 	if (p->capa.ann.as4byte && p->capa.peer.as4byte)
2284 		p->capa.neg.as4byte = 1;
2285 	else
2286 		p->capa.neg.as4byte = 0;
2287 
2288 	/* MP: both side must announce capability */
2289 	for (i = 0; i < AID_MAX; i++) {
2290 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
2291 			p->capa.neg.mp[i] = 1;
2292 			hasmp = 1;
2293 		} else
2294 			p->capa.neg.mp[i] = 0;
2295 	}
2296 	/* if no MP capability present for default IPv4 unicast mode */
2297 	if (!hasmp)
2298 		p->capa.neg.mp[AID_INET] = 1;
2299 
2300 	p->capa.neg.restart = p->capa.peer.restart;
2301 
2302 	return (0);
2303 }
2304 
2305 void
2306 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2307 {
2308 	struct imsg		 imsg;
2309 	struct mrt		 xmrt;
2310 	struct mrt		*mrt;
2311 	struct peer_config	*pconf;
2312 	struct peer		*p, *next;
2313 	struct listen_addr	*la, *nla;
2314 	struct kif		*kif;
2315 	u_char			*data;
2316 	enum reconf_action	 reconf;
2317 	int			 n, depend_ok, restricted;
2318 	u_int8_t		 errcode, subcode;
2319 
2320 	if ((n = imsg_read(ibuf)) == -1)
2321 		fatal("session_dispatch_imsg: imsg_read error");
2322 
2323 	if (n == 0)	/* connection closed */
2324 		fatalx("session_dispatch_imsg: pipe closed");
2325 
2326 	for (;;) {
2327 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2328 			fatal("session_dispatch_imsg: imsg_get error");
2329 
2330 		if (n == 0)
2331 			break;
2332 
2333 		switch (imsg.hdr.type) {
2334 		case IMSG_RECONF_CONF:
2335 			if (idx != PFD_PIPE_MAIN)
2336 				fatalx("reconf request not from parent");
2337 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
2338 			    NULL)
2339 				fatal(NULL);
2340 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
2341 			if ((nconf->listen_addrs = calloc(1,
2342 			    sizeof(struct listen_addrs))) == NULL)
2343 				fatal(NULL);
2344 			TAILQ_INIT(nconf->listen_addrs);
2345 			npeers = NULL;
2346 			init_conf(nconf);
2347 			pending_reconf = 1;
2348 			break;
2349 		case IMSG_RECONF_PEER:
2350 			if (idx != PFD_PIPE_MAIN)
2351 				fatalx("reconf request not from parent");
2352 			pconf = imsg.data;
2353 			p = getpeerbyaddr(&pconf->remote_addr);
2354 			if (p == NULL) {
2355 				if ((p = calloc(1, sizeof(struct peer))) ==
2356 				    NULL)
2357 					fatal("new_peer");
2358 				p->state = p->prev_state = STATE_NONE;
2359 				p->next = npeers;
2360 				npeers = p;
2361 				reconf = RECONF_REINIT;
2362 			} else
2363 				reconf = RECONF_KEEP;
2364 
2365 			memcpy(&p->conf, pconf, sizeof(struct peer_config));
2366 			p->conf.reconf_action = reconf;
2367 			break;
2368 		case IMSG_RECONF_LISTENER:
2369 			if (idx != PFD_PIPE_MAIN)
2370 				fatalx("reconf request not from parent");
2371 			if (nconf == NULL)
2372 				fatalx("IMSG_RECONF_LISTENER but no config");
2373 			nla = imsg.data;
2374 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2375 				if (!la_cmp(la, nla))
2376 					break;
2377 
2378 			if (la == NULL) {
2379 				if (nla->reconf != RECONF_REINIT)
2380 					fatalx("king bula sez: "
2381 					    "expected REINIT");
2382 
2383 				if ((nla->fd = imsg.fd) == -1)
2384 					log_warnx("expected to receive fd for "
2385 					    "%s but didn't receive any",
2386 					    log_sockaddr((struct sockaddr *)
2387 					    &nla->sa));
2388 
2389 				la = calloc(1, sizeof(struct listen_addr));
2390 				if (la == NULL)
2391 					fatal(NULL);
2392 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2393 				la->flags = nla->flags;
2394 				la->fd = nla->fd;
2395 				la->reconf = RECONF_REINIT;
2396 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2397 				    entry);
2398 			} else {
2399 				if (nla->reconf != RECONF_KEEP)
2400 					fatalx("king bula sez: expected KEEP");
2401 				la->reconf = RECONF_KEEP;
2402 			}
2403 
2404 			break;
2405 		case IMSG_RECONF_CTRL:
2406 			if (idx != PFD_PIPE_MAIN)
2407 				fatalx("reconf request not from parent");
2408 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2409 			    sizeof(restricted))
2410 				fatalx("IFINFO imsg with wrong len");
2411 			memcpy(&restricted, imsg.data, sizeof(restricted));
2412 			if (imsg.fd == -1) {
2413 				log_warnx("expected to receive fd for control "
2414 				    "socket but didn't receive any");
2415 				break;
2416 			}
2417 			if (restricted) {
2418 				control_shutdown(rcsock);
2419 				rcsock = imsg.fd;
2420 				control_listen(rcsock);
2421 			} else {
2422 				control_shutdown(csock);
2423 				csock = imsg.fd;
2424 				control_listen(csock);
2425 			}
2426 			break;
2427 		case IMSG_RECONF_DONE:
2428 			if (idx != PFD_PIPE_MAIN)
2429 				fatalx("reconf request not from parent");
2430 			if (nconf == NULL)
2431 				fatalx("got IMSG_RECONF_DONE but no config");
2432 			conf->flags = nconf->flags;
2433 			conf->log = nconf->log;
2434 			conf->bgpid = nconf->bgpid;
2435 			conf->clusterid = nconf->clusterid;
2436 			conf->as = nconf->as;
2437 			conf->short_as = nconf->short_as;
2438 			conf->holdtime = nconf->holdtime;
2439 			conf->min_holdtime = nconf->min_holdtime;
2440 			conf->connectretry = nconf->connectretry;
2441 
2442 			/* add new peers */
2443 			for (p = npeers; p != NULL; p = next) {
2444 				next = p->next;
2445 				p->next = peers;
2446 				peers = p;
2447 			}
2448 			/* find ones that need attention */
2449 			for (p = peers; p != NULL; p = p->next) {
2450 				/* needs to be deleted? */
2451 				if (p->conf.reconf_action == RECONF_NONE &&
2452 				    !p->conf.cloned)
2453 					p->conf.reconf_action = RECONF_DELETE;
2454 				/* had demotion, is demoted, demote removed? */
2455 				if (p->demoted && !p->conf.demote_group[0])
2456 						session_demote(p, -1);
2457 			}
2458 
2459 			/* delete old listeners */
2460 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2461 			    la = nla) {
2462 				nla = TAILQ_NEXT(la, entry);
2463 				if (la->reconf == RECONF_NONE) {
2464 					log_info("not listening on %s any more",
2465 					    log_sockaddr(
2466 					    (struct sockaddr *)&la->sa));
2467 					TAILQ_REMOVE(conf->listen_addrs, la,
2468 					    entry);
2469 					close(la->fd);
2470 					free(la);
2471 				}
2472 			}
2473 
2474 			/* add new listeners */
2475 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2476 			    NULL) {
2477 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2478 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2479 				    entry);
2480 			}
2481 
2482 			setup_listeners(listener_cnt);
2483 			free(nconf->listen_addrs);
2484 			free(nconf);
2485 			nconf = NULL;
2486 			pending_reconf = 0;
2487 			log_info("SE reconfigured");
2488 			break;
2489 		case IMSG_IFINFO:
2490 			if (idx != PFD_PIPE_MAIN)
2491 				fatalx("IFINFO message not from parent");
2492 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2493 			    sizeof(struct kif))
2494 				fatalx("IFINFO imsg with wrong len");
2495 			kif = imsg.data;
2496 			depend_ok = (kif->flags & IFF_UP) &&
2497 			    LINK_STATE_IS_UP(kif->link_state);
2498 
2499 			for (p = peers; p != NULL; p = p->next)
2500 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2501 					if (depend_ok && !p->depend_ok) {
2502 						p->depend_ok = depend_ok;
2503 						bgp_fsm(p, EVNT_START);
2504 					} else if (!depend_ok && p->depend_ok) {
2505 						p->depend_ok = depend_ok;
2506 						session_stop(p,
2507 						    ERR_CEASE_OTHER_CHANGE);
2508 					}
2509 				}
2510 			break;
2511 		case IMSG_MRT_OPEN:
2512 		case IMSG_MRT_REOPEN:
2513 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2514 			    sizeof(struct mrt)) {
2515 				log_warnx("wrong imsg len");
2516 				break;
2517 			}
2518 
2519 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2520 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2521 				log_warnx("expected to receive fd for mrt dump "
2522 				    "but didn't receive any");
2523 
2524 			mrt = mrt_get(&mrthead, &xmrt);
2525 			if (mrt == NULL) {
2526 				/* new dump */
2527 				mrt = calloc(1, sizeof(struct mrt));
2528 				if (mrt == NULL)
2529 					fatal("session_dispatch_imsg");
2530 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2531 				TAILQ_INIT(&mrt->wbuf.bufs);
2532 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2533 			} else {
2534 				/* old dump reopened */
2535 				close(mrt->wbuf.fd);
2536 				mrt->wbuf.fd = xmrt.wbuf.fd;
2537 			}
2538 			break;
2539 		case IMSG_MRT_CLOSE:
2540 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2541 			    sizeof(struct mrt)) {
2542 				log_warnx("wrong imsg len");
2543 				break;
2544 			}
2545 
2546 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2547 			mrt = mrt_get(&mrthead, &xmrt);
2548 			if (mrt != NULL) {
2549 				mrt_clean(mrt);
2550 				LIST_REMOVE(mrt, entry);
2551 				free(mrt);
2552 			}
2553 			break;
2554 		case IMSG_CTL_KROUTE:
2555 		case IMSG_CTL_KROUTE_ADDR:
2556 		case IMSG_CTL_SHOW_NEXTHOP:
2557 		case IMSG_CTL_SHOW_INTERFACE:
2558 		case IMSG_CTL_SHOW_FIB_TABLES:
2559 			if (idx != PFD_PIPE_MAIN)
2560 				fatalx("ctl kroute request not from parent");
2561 			control_imsg_relay(&imsg);
2562 			break;
2563 		case IMSG_CTL_SHOW_RIB:
2564 		case IMSG_CTL_SHOW_RIB_PREFIX:
2565 		case IMSG_CTL_SHOW_RIB_ATTR:
2566 		case IMSG_CTL_SHOW_RIB_MEM:
2567 		case IMSG_CTL_SHOW_NETWORK:
2568 		case IMSG_CTL_SHOW_NEIGHBOR:
2569 			if (idx != PFD_PIPE_ROUTE_CTL)
2570 				fatalx("ctl rib request not from RDE");
2571 			control_imsg_relay(&imsg);
2572 			break;
2573 		case IMSG_CTL_END:
2574 		case IMSG_CTL_RESULT:
2575 			control_imsg_relay(&imsg);
2576 			break;
2577 		case IMSG_UPDATE:
2578 			if (idx != PFD_PIPE_ROUTE)
2579 				fatalx("update request not from RDE");
2580 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2581 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2582 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2583 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2584 				log_warnx("RDE sent invalid update");
2585 			else
2586 				session_update(imsg.hdr.peerid, imsg.data,
2587 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2588 			break;
2589 		case IMSG_UPDATE_ERR:
2590 			if (idx != PFD_PIPE_ROUTE)
2591 				fatalx("update request not from RDE");
2592 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2593 				log_warnx("RDE sent invalid notification");
2594 				break;
2595 			}
2596 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2597 				log_warnx("no such peer: id=%u",
2598 				    imsg.hdr.peerid);
2599 				break;
2600 			}
2601 			data = imsg.data;
2602 			errcode = *data++;
2603 			subcode = *data++;
2604 
2605 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2606 				data = NULL;
2607 
2608 			session_notification(p, errcode, subcode,
2609 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2610 			switch (errcode) {
2611 			case ERR_CEASE:
2612 				switch (subcode) {
2613 				case ERR_CEASE_MAX_PREFIX:
2614 					bgp_fsm(p, EVNT_STOP);
2615 					if (p->conf.max_prefix_restart)
2616 						timer_set(p, Timer_IdleHold, 60 *
2617 						    p->conf.max_prefix_restart);
2618 					break;
2619 				default:
2620 					bgp_fsm(p, EVNT_CON_FATAL);
2621 					break;
2622 				}
2623 				break;
2624 			default:
2625 				bgp_fsm(p, EVNT_CON_FATAL);
2626 				break;
2627 			}
2628 			break;
2629 		default:
2630 			break;
2631 		}
2632 		imsg_free(&imsg);
2633 	}
2634 }
2635 
2636 int
2637 la_cmp(struct listen_addr *a, struct listen_addr *b)
2638 {
2639 	struct sockaddr_in	*in_a, *in_b;
2640 	struct sockaddr_in6	*in6_a, *in6_b;
2641 
2642 	if (a->sa.ss_family != b->sa.ss_family)
2643 		return (1);
2644 
2645 	switch (a->sa.ss_family) {
2646 	case AF_INET:
2647 		in_a = (struct sockaddr_in *)&a->sa;
2648 		in_b = (struct sockaddr_in *)&b->sa;
2649 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2650 			return (1);
2651 		if (in_a->sin_port != in_b->sin_port)
2652 			return (1);
2653 		break;
2654 	case AF_INET6:
2655 		in6_a = (struct sockaddr_in6 *)&a->sa;
2656 		in6_b = (struct sockaddr_in6 *)&b->sa;
2657 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2658 		    sizeof(struct in6_addr)))
2659 			return (1);
2660 		if (in6_a->sin6_port != in6_b->sin6_port)
2661 			return (1);
2662 		break;
2663 	default:
2664 		fatal("king bula sez: unknown address family");
2665 		/* NOTREACHED */
2666 	}
2667 
2668 	return (0);
2669 }
2670 
2671 struct peer *
2672 getpeerbyaddr(struct bgpd_addr *addr)
2673 {
2674 	struct peer *p;
2675 
2676 	/* we might want a more effective way to find peers by IP */
2677 	for (p = peers; p != NULL &&
2678 	    memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr));
2679 	    p = p->next)
2680 		;	/* nothing */
2681 
2682 	return (p);
2683 }
2684 
2685 struct peer *
2686 getpeerbydesc(const char *descr)
2687 {
2688 	struct peer	*p, *res = NULL;
2689 	int		 match = 0;
2690 
2691 	for (p = peers; p != NULL; p = p->next)
2692 		if (!strcmp(p->conf.descr, descr)) {
2693 			res = p;
2694 			match++;
2695 		}
2696 
2697 	if (match > 1)
2698 		log_info("neighbor description \"%s\" not unique, request "
2699 		    "aborted", descr);
2700 
2701 	if (match == 1)
2702 		return (res);
2703 	else
2704 		return (NULL);
2705 }
2706 
2707 struct peer *
2708 getpeerbyip(struct sockaddr *ip)
2709 {
2710 	struct bgpd_addr addr;
2711 	struct peer	*p, *newpeer, *loose = NULL;
2712 	u_int32_t	 id;
2713 
2714 	sa2addr(ip, &addr);
2715 
2716 	/* we might want a more effective way to find peers by IP */
2717 	for (p = peers; p != NULL; p = p->next)
2718 		if (!p->conf.template &&
2719 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
2720 			return (p);
2721 
2722 	/* try template matching */
2723 	for (p = peers; p != NULL; p = p->next)
2724 		if (p->conf.template &&
2725 		    p->conf.remote_addr.aid == addr.aid &&
2726 		    session_match_mask(p, &addr))
2727 			if (loose == NULL || loose->conf.remote_masklen <
2728 			    p->conf.remote_masklen)
2729 				loose = p;
2730 
2731 	if (loose != NULL) {
2732 		/* clone */
2733 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
2734 			fatal(NULL);
2735 		memcpy(newpeer, loose, sizeof(struct peer));
2736 		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
2737 			for (p = peers; p != NULL && p->conf.id != id;
2738 			    p = p->next)
2739 				;	/* nothing */
2740 			if (p == NULL) {	/* we found a free id */
2741 				newpeer->conf.id = id;
2742 				break;
2743 			}
2744 		}
2745 		sa2addr(ip, &newpeer->conf.remote_addr);
2746 		switch (ip->sa_family) {
2747 		case AF_INET:
2748 			newpeer->conf.remote_masklen = 32;
2749 			break;
2750 		case AF_INET6:
2751 			newpeer->conf.remote_masklen = 128;
2752 			break;
2753 		}
2754 		newpeer->conf.template = 0;
2755 		newpeer->conf.cloned = 1;
2756 		newpeer->state = newpeer->prev_state = STATE_NONE;
2757 		newpeer->conf.reconf_action = RECONF_KEEP;
2758 		newpeer->rbuf = NULL;
2759 		init_peer(newpeer);
2760 		bgp_fsm(newpeer, EVNT_START);
2761 		newpeer->next = peers;
2762 		peers = newpeer;
2763 		return (newpeer);
2764 	}
2765 
2766 	return (NULL);
2767 }
2768 
2769 int
2770 session_match_mask(struct peer *p, struct bgpd_addr *a)
2771 {
2772 	in_addr_t	 v4mask;
2773 	struct in6_addr	 masked;
2774 
2775 	switch (p->conf.remote_addr.aid) {
2776 	case AID_INET:
2777 		v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
2778 		if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask))
2779 			return (1);
2780 		return (0);
2781 	case AID_INET6:
2782 		inet6applymask(&masked, &a->v6, p->conf.remote_masklen);
2783 
2784 		if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked)))
2785 			return (1);
2786 		return (0);
2787 	}
2788 	return (0);
2789 }
2790 
2791 struct peer *
2792 getpeerbyid(u_int32_t peerid)
2793 {
2794 	struct peer *p;
2795 
2796 	/* we might want a more effective way to find peers by IP */
2797 	for (p = peers; p != NULL &&
2798 	    p->conf.id != peerid; p = p->next)
2799 		;	/* nothing */
2800 
2801 	return (p);
2802 }
2803 
2804 void
2805 session_down(struct peer *peer)
2806 {
2807 	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
2808 	peer->stats.last_updown = time(NULL);
2809 	if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
2810 	    NULL, 0) == -1)
2811 		fatalx("imsg_compose error");
2812 }
2813 
2814 void
2815 session_up(struct peer *p)
2816 {
2817 	struct session_up	 sup;
2818 
2819 	if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
2820 	    &p->conf, sizeof(p->conf)) == -1)
2821 		fatalx("imsg_compose error");
2822 
2823 	sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr);
2824 	sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr);
2825 
2826 	sup.remote_bgpid = p->remote_bgpid;
2827 	sup.short_as = p->short_as;
2828 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
2829 	p->stats.last_updown = time(NULL);
2830 	if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
2831 	    &sup, sizeof(sup)) == -1)
2832 		fatalx("imsg_compose error");
2833 }
2834 
2835 int
2836 imsg_compose_parent(int type, u_int32_t peerid, pid_t pid, void *data,
2837     u_int16_t datalen)
2838 {
2839 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
2840 }
2841 
2842 int
2843 imsg_compose_rde(int type, pid_t pid, void *data, u_int16_t datalen)
2844 {
2845 	return (imsg_compose(ibuf_rde, type, 0, pid, -1, data, datalen));
2846 }
2847 
2848 void
2849 session_demote(struct peer *p, int level)
2850 {
2851 	struct demote_msg	msg;
2852 
2853 	strlcpy(msg.demote_group, p->conf.demote_group,
2854 	    sizeof(msg.demote_group));
2855 	msg.level = level;
2856 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
2857 	    &msg, sizeof(msg)) == -1)
2858 		fatalx("imsg_compose error");
2859 
2860 	p->demoted += level;
2861 }
2862 
2863 void
2864 session_stop(struct peer *peer, u_int8_t subcode)
2865 {
2866 	switch (peer->state) {
2867 	case STATE_OPENSENT:
2868 	case STATE_OPENCONFIRM:
2869 	case STATE_ESTABLISHED:
2870 		session_notification(peer, ERR_CEASE, subcode, NULL, 0);
2871 		break;
2872 	default:
2873 		/* session not open, no need to send notification */
2874 		break;
2875 	}
2876 	bgp_fsm(peer, EVNT_STOP);
2877 }
2878