xref: /openbsd-src/usr.sbin/bgpd/session.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: session.c,v 1.319 2011/07/09 02:51:18 henning Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <net/if_types.h>
28 #include <netinet/in.h>
29 #include <netinet/in_systm.h>
30 #include <netinet/ip.h>
31 #include <netinet/tcp.h>
32 #include <arpa/inet.h>
33 
34 #include <err.h>
35 #include <errno.h>
36 #include <fcntl.h>
37 #include <poll.h>
38 #include <pwd.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 
45 #include "bgpd.h"
46 #include "mrt.h"
47 #include "session.h"
48 
49 #define PFD_PIPE_MAIN		0
50 #define PFD_PIPE_ROUTE		1
51 #define PFD_PIPE_ROUTE_CTL	2
52 #define PFD_SOCK_CTL		3
53 #define PFD_SOCK_RCTL		4
54 #define PFD_SOCK_PFKEY		5
55 #define PFD_LISTENERS_START	6
56 
57 void	session_sighdlr(int);
58 int	setup_listeners(u_int *);
59 void	init_conf(struct bgpd_config *);
60 void	init_peer(struct peer *);
61 void	start_timer_holdtime(struct peer *);
62 void	start_timer_keepalive(struct peer *);
63 void	session_close_connection(struct peer *);
64 void	change_state(struct peer *, enum session_state, enum session_events);
65 int	session_setup_socket(struct peer *);
66 void	session_accept(int);
67 int	session_connect(struct peer *);
68 void	session_tcp_established(struct peer *);
69 void	session_capa_ann_none(struct peer *);
70 int	session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
71 int	session_capa_add_mp(struct ibuf *, u_int8_t);
72 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
73 int	session_sendmsg(struct bgp_msg *, struct peer *);
74 void	session_open(struct peer *);
75 void	session_keepalive(struct peer *);
76 void	session_update(u_int32_t, void *, size_t);
77 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
78 	    ssize_t);
79 void	session_rrefresh(struct peer *, u_int8_t);
80 int	session_dispatch_msg(struct pollfd *, struct peer *);
81 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
82 int	parse_open(struct peer *);
83 int	parse_update(struct peer *);
84 int	parse_refresh(struct peer *);
85 int	parse_notification(struct peer *);
86 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
87 int	capa_neg_calc(struct peer *);
88 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
89 void	session_up(struct peer *);
90 void	session_down(struct peer *);
91 void	session_demote(struct peer *, int);
92 
93 int		 la_cmp(struct listen_addr *, struct listen_addr *);
94 struct peer	*getpeerbyip(struct sockaddr *);
95 int		 session_match_mask(struct peer *, struct bgpd_addr *);
96 struct peer	*getpeerbyid(u_int32_t);
97 
98 struct bgpd_config	*conf, *nconf;
99 struct bgpd_sysdep	 sysdep;
100 struct peer		*peers, *npeers;
101 volatile sig_atomic_t	 session_quit;
102 int			 pending_reconf;
103 int			 csock = -1, rcsock = -1;
104 u_int			 peer_cnt;
105 struct imsgbuf		*ibuf_rde;
106 struct imsgbuf		*ibuf_rde_ctl;
107 struct imsgbuf		*ibuf_main;
108 
109 struct mrt_head		 mrthead;
110 
111 void
112 session_sighdlr(int sig)
113 {
114 	switch (sig) {
115 	case SIGINT:
116 	case SIGTERM:
117 		session_quit = 1;
118 		break;
119 	}
120 }
121 
122 int
123 setup_listeners(u_int *la_cnt)
124 {
125 	int			 ttl = 255;
126 	int			 opt;
127 	struct listen_addr	*la;
128 	u_int			 cnt = 0;
129 
130 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
131 		la->reconf = RECONF_NONE;
132 		cnt++;
133 
134 		if (la->flags & LISTENER_LISTENING)
135 			continue;
136 
137 		if (la->fd == -1) {
138 			log_warn("cannot establish listener on %s: invalid fd",
139 			    log_sockaddr((struct sockaddr *)&la->sa));
140 			continue;
141 		}
142 
143 		opt = 1;
144 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
145 		    &opt, sizeof(opt)) == -1) {
146 			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
147 				log_warnx("md5sig not available, disabling");
148 				sysdep.no_md5sig = 1;
149 			} else
150 				fatal("setsockopt TCP_MD5SIG");
151 		}
152 
153 		/* set ttl to 255 so that ttl-security works */
154 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
155 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
156 			log_warn("setup_listeners setsockopt TTL");
157 			continue;
158 		}
159 
160 		session_socket_blockmode(la->fd, BM_NONBLOCK);
161 
162 		if (listen(la->fd, MAX_BACKLOG)) {
163 			close(la->fd);
164 			fatal("listen");
165 		}
166 
167 		la->flags |= LISTENER_LISTENING;
168 
169 		log_info("listening on %s",
170 		    log_sockaddr((struct sockaddr *)&la->sa));
171 	}
172 
173 	*la_cnt = cnt;
174 
175 	return (0);
176 }
177 
178 pid_t
179 session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2],
180     int pipe_s2rctl[2])
181 {
182 	int			 nfds, timeout, pfkeysock;
183 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
184 	pid_t			 pid;
185 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
186 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
187 	u_int			 new_cnt;
188 	u_int32_t		 ctl_queued;
189 	struct passwd		*pw;
190 	struct peer		*p, **peer_l = NULL, *last, *next;
191 	struct mrt		*m, *xm, **mrt_l = NULL;
192 	struct pollfd		*pfd = NULL;
193 	struct ctl_conn		*ctl_conn;
194 	struct listen_addr	*la;
195 	void			*newp;
196 	short			 events;
197 
198 	switch (pid = fork()) {
199 	case -1:
200 		fatal("cannot fork");
201 	case 0:
202 		break;
203 	default:
204 		return (pid);
205 	}
206 
207 	if ((pw = getpwnam(BGPD_USER)) == NULL)
208 		fatal(NULL);
209 
210 	if (chroot(pw->pw_dir) == -1)
211 		fatal("chroot");
212 	if (chdir("/") == -1)
213 		fatal("chdir(\"/\")");
214 
215 	setproctitle("session engine");
216 	bgpd_process = PROC_SE;
217 	pfkeysock = pfkey_init(&sysdep);
218 
219 	if (setgroups(1, &pw->pw_gid) ||
220 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
221 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
222 		fatal("can't drop privileges");
223 
224 	signal(SIGTERM, session_sighdlr);
225 	signal(SIGINT, session_sighdlr);
226 	signal(SIGPIPE, SIG_IGN);
227 	signal(SIGHUP, SIG_IGN);
228 	signal(SIGALRM, SIG_IGN);
229 	signal(SIGUSR1, SIG_IGN);
230 
231 	close(pipe_m2s[0]);
232 	close(pipe_s2r[1]);
233 	close(pipe_s2rctl[1]);
234 	close(pipe_m2r[0]);
235 	close(pipe_m2r[1]);
236 	if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
237 	    (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL ||
238 	    (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
239 		fatal(NULL);
240 	imsg_init(ibuf_rde, pipe_s2r[0]);
241 	imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]);
242 	imsg_init(ibuf_main, pipe_m2s[1]);
243 
244 	TAILQ_INIT(&ctl_conns);
245 	LIST_INIT(&mrthead);
246 	listener_cnt = 0;
247 	peer_cnt = 0;
248 	ctl_cnt = 0;
249 
250 	if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL)
251 		fatal(NULL);
252 	if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) ==
253 	    NULL)
254 		fatal(NULL);
255 	TAILQ_INIT(conf->listen_addrs);
256 
257 	log_info("session engine ready");
258 
259 	while (session_quit == 0) {
260 		/* check for peers to be initialized or deleted */
261 		last = NULL;
262 		for (p = peers; p != NULL; p = next) {
263 			next = p->next;
264 			if (!pending_reconf) {
265 				/* cloned peer that idled out? */
266 				if (p->state == STATE_IDLE && p->conf.cloned &&
267 				    time(NULL) - p->stats.last_updown >=
268 				    INTERVAL_HOLD_CLONED)
269 					p->conf.reconf_action = RECONF_DELETE;
270 
271 				/* new peer that needs init? */
272 				if (p->state == STATE_NONE)
273 					init_peer(p);
274 
275 				/* reinit due? */
276 				if (p->conf.reconf_action == RECONF_REINIT) {
277 					session_stop(p, ERR_CEASE_ADMIN_RESET);
278 					if (!p->conf.down)
279 						timer_set(p, Timer_IdleHold, 0);
280 				}
281 
282 				/* deletion due? */
283 				if (p->conf.reconf_action == RECONF_DELETE) {
284 					if (p->demoted)
285 						session_demote(p, -1);
286 					p->conf.demote_group[0] = 0;
287 					session_stop(p, ERR_CEASE_PEER_UNCONF);
288 					log_peer_warnx(&p->conf, "removed");
289 					if (last != NULL)
290 						last->next = next;
291 					else
292 						peers = next;
293 					timer_remove_all(p);
294 					free(p);
295 					peer_cnt--;
296 					continue;
297 				}
298 				p->conf.reconf_action = RECONF_NONE;
299 			}
300 			last = p;
301 		}
302 
303 		if (peer_cnt > peer_l_elms) {
304 			if ((newp = realloc(peer_l, sizeof(struct peer *) *
305 			    peer_cnt)) == NULL) {
306 				/* panic for now  */
307 				log_warn("could not resize peer_l from %u -> %u"
308 				    " entries", peer_l_elms, peer_cnt);
309 				fatalx("exiting");
310 			}
311 			peer_l = newp;
312 			peer_l_elms = peer_cnt;
313 		}
314 
315 		mrt_cnt = 0;
316 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
317 			xm = LIST_NEXT(m, entry);
318 			if (m->state == MRT_STATE_REMOVE) {
319 				mrt_clean(m);
320 				LIST_REMOVE(m, entry);
321 				free(m);
322 				continue;
323 			}
324 			if (m->wbuf.queued)
325 				mrt_cnt++;
326 		}
327 
328 		if (mrt_cnt > mrt_l_elms) {
329 			if ((newp = realloc(mrt_l, sizeof(struct mrt *) *
330 			    mrt_cnt)) == NULL) {
331 				/* panic for now  */
332 				log_warn("could not resize mrt_l from %u -> %u"
333 				    " entries", mrt_l_elms, mrt_cnt);
334 				fatalx("exiting");
335 			}
336 			mrt_l = newp;
337 			mrt_l_elms = mrt_cnt;
338 		}
339 
340 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
341 		    ctl_cnt + mrt_cnt;
342 		if (new_cnt > pfd_elms) {
343 			if ((newp = realloc(pfd, sizeof(struct pollfd) *
344 			    new_cnt)) == NULL) {
345 				/* panic for now  */
346 				log_warn("could not resize pfd from %u -> %u"
347 				    " entries", pfd_elms, new_cnt);
348 				fatalx("exiting");
349 			}
350 			pfd = newp;
351 			pfd_elms = new_cnt;
352 		}
353 
354 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
355 		pfd[PFD_PIPE_MAIN].fd = ibuf_main->fd;
356 		pfd[PFD_PIPE_MAIN].events = POLLIN;
357 		if (ibuf_main->w.queued > 0)
358 			pfd[PFD_PIPE_MAIN].events |= POLLOUT;
359 		pfd[PFD_PIPE_ROUTE].fd = ibuf_rde->fd;
360 		pfd[PFD_PIPE_ROUTE].events = POLLIN;
361 		if (ibuf_rde->w.queued > 0)
362 			pfd[PFD_PIPE_ROUTE].events |= POLLOUT;
363 
364 		ctl_queued = 0;
365 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry)
366 			ctl_queued += ctl_conn->ibuf.w.queued;
367 
368 		pfd[PFD_PIPE_ROUTE_CTL].fd = ibuf_rde_ctl->fd;
369 		if (ctl_queued < SESSION_CTL_QUEUE_MAX)
370 			/*
371 			 * Do not act as unlimited buffer. Don't read in more
372 			 * messages if the ctl sockets are getting full.
373 			 */
374 			pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN;
375 		pfd[PFD_SOCK_CTL].fd = csock;
376 		pfd[PFD_SOCK_CTL].events = POLLIN;
377 		pfd[PFD_SOCK_RCTL].fd = rcsock;
378 		pfd[PFD_SOCK_RCTL].events = POLLIN;
379 		pfd[PFD_SOCK_PFKEY].fd = pfkeysock;
380 		pfd[PFD_SOCK_PFKEY].events = POLLIN;
381 
382 		i = PFD_LISTENERS_START;
383 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
384 			pfd[i].fd = la->fd;
385 			pfd[i].events = POLLIN;
386 			i++;
387 		}
388 		idx_listeners = i;
389 		timeout = 240;	/* loop every 240s at least */
390 
391 		for (p = peers; p != NULL; p = p->next) {
392 			time_t	nextaction;
393 			struct peer_timer *pt;
394 
395 			/* check timers */
396 			if ((pt = timer_nextisdue(p)) != NULL) {
397 				switch (pt->type) {
398 				case Timer_Hold:
399 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
400 					break;
401 				case Timer_ConnectRetry:
402 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
403 					break;
404 				case Timer_Keepalive:
405 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
406 					break;
407 				case Timer_IdleHold:
408 					bgp_fsm(p, EVNT_START);
409 					break;
410 				case Timer_IdleHoldReset:
411 					p->IdleHoldTime /= 2;
412 					if (p->IdleHoldTime <=
413 					    INTERVAL_IDLE_HOLD_INITIAL) {
414 						p->IdleHoldTime =
415 						    INTERVAL_IDLE_HOLD_INITIAL;
416 						timer_stop(p,
417 						    Timer_IdleHoldReset);
418 						p->errcnt = 0;
419 					} else
420 						timer_set(p,
421 						    Timer_IdleHoldReset,
422 						    p->IdleHoldTime);
423 					break;
424 				case Timer_CarpUndemote:
425 					timer_stop(p, Timer_CarpUndemote);
426 					if (p->demoted &&
427 					    p->state == STATE_ESTABLISHED)
428 						session_demote(p, -1);
429 					break;
430 				default:
431 					fatalx("King Bula lost in time");
432 				}
433 			}
434 			if ((nextaction = timer_nextduein(p)) != -1 &&
435 			    nextaction < timeout)
436 				timeout = nextaction;
437 
438 			/* are we waiting for a write? */
439 			events = POLLIN;
440 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
441 				events |= POLLOUT;
442 
443 			/* poll events */
444 			if (p->fd != -1 && events != 0) {
445 				pfd[i].fd = p->fd;
446 				pfd[i].events = events;
447 				peer_l[i - idx_listeners] = p;
448 				i++;
449 			}
450 		}
451 
452 		idx_peers = i;
453 
454 		LIST_FOREACH(m, &mrthead, entry)
455 			if (m->wbuf.queued) {
456 				pfd[i].fd = m->wbuf.fd;
457 				pfd[i].events = POLLOUT;
458 				mrt_l[i - idx_peers] = m;
459 				i++;
460 			}
461 
462 		idx_mrts = i;
463 
464 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
465 			pfd[i].fd = ctl_conn->ibuf.fd;
466 			pfd[i].events = POLLIN;
467 			if (ctl_conn->ibuf.w.queued > 0)
468 				pfd[i].events |= POLLOUT;
469 			i++;
470 		}
471 
472 		if (timeout < 0)
473 			timeout = 0;
474 		if ((nfds = poll(pfd, i, timeout * 1000)) == -1)
475 			if (errno != EINTR)
476 				fatal("poll error");
477 
478 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLOUT)
479 			if (msgbuf_write(&ibuf_main->w) < 0)
480 				fatal("pipe write error");
481 
482 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLIN) {
483 			nfds--;
484 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
485 			    &listener_cnt);
486 		}
487 
488 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLOUT)
489 			if (msgbuf_write(&ibuf_rde->w) < 0)
490 				fatal("pipe write error");
491 
492 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLIN) {
493 			nfds--;
494 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
495 			    &listener_cnt);
496 		}
497 
498 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE_CTL].revents & POLLIN) {
499 			nfds--;
500 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
501 			    &listener_cnt);
502 		}
503 
504 		if (nfds > 0 && pfd[PFD_SOCK_CTL].revents & POLLIN) {
505 			nfds--;
506 			ctl_cnt += control_accept(csock, 0);
507 		}
508 
509 		if (nfds > 0 && pfd[PFD_SOCK_RCTL].revents & POLLIN) {
510 			nfds--;
511 			ctl_cnt += control_accept(rcsock, 1);
512 		}
513 
514 		if (nfds > 0 && pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
515 			nfds--;
516 			if (pfkey_read(pfkeysock, NULL) == -1) {
517 				log_warnx("pfkey_read failed, exiting...");
518 				session_quit = 1;
519 			}
520 		}
521 
522 		for (j = PFD_LISTENERS_START; nfds > 0 && j < idx_listeners;
523 		    j++)
524 			if (pfd[j].revents & POLLIN) {
525 				nfds--;
526 				session_accept(pfd[j].fd);
527 			}
528 
529 		for (; nfds > 0 && j < idx_peers; j++)
530 			nfds -= session_dispatch_msg(&pfd[j],
531 			    peer_l[j - idx_listeners]);
532 
533 		for (; nfds > 0 && j < idx_mrts; j++)
534 			if (pfd[j].revents & POLLOUT) {
535 				nfds--;
536 				mrt_write(mrt_l[j - idx_peers]);
537 			}
538 
539 		for (; nfds > 0 && j < i; j++)
540 			nfds -= control_dispatch_msg(&pfd[j], &ctl_cnt);
541 	}
542 
543 	while ((p = peers) != NULL) {
544 		peers = p->next;
545 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
546 		pfkey_remove(p);
547 		free(p);
548 	}
549 
550 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
551 		mrt_clean(m);
552 		LIST_REMOVE(m, entry);
553 		free(m);
554 	}
555 
556 	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
557 		TAILQ_REMOVE(conf->listen_addrs, la, entry);
558 		free(la);
559 	}
560 	free(conf->listen_addrs);
561 	free(peer_l);
562 	free(mrt_l);
563 	free(pfd);
564 
565 	msgbuf_write(&ibuf_rde->w);
566 	msgbuf_clear(&ibuf_rde->w);
567 	free(ibuf_rde);
568 	msgbuf_write(&ibuf_main->w);
569 	msgbuf_clear(&ibuf_main->w);
570 	free(ibuf_main);
571 
572 	control_shutdown(csock);
573 	control_shutdown(rcsock);
574 	log_info("session engine exiting");
575 	_exit(0);
576 }
577 
578 void
579 init_conf(struct bgpd_config *c)
580 {
581 	if (!c->holdtime)
582 		c->holdtime = INTERVAL_HOLD;
583 	if (!c->connectretry)
584 		c->connectretry = INTERVAL_CONNECTRETRY;
585 }
586 
587 void
588 init_peer(struct peer *p)
589 {
590 	TAILQ_INIT(&p->timers);
591 	p->fd = p->wbuf.fd = -1;
592 
593 	if (p->conf.if_depend[0])
594 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
595 		    p->conf.if_depend, sizeof(p->conf.if_depend));
596 	else
597 		p->depend_ok = 1;
598 
599 	peer_cnt++;
600 
601 	change_state(p, STATE_IDLE, EVNT_NONE);
602 	if (p->conf.down)
603 		timer_stop(p, Timer_IdleHold);		/* no autostart */
604 	else
605 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
606 
607 	/*
608 	 * on startup, demote if requested.
609 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
610 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
611 	 */
612 	if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0])
613 		session_demote(p, +1);
614 }
615 
616 void
617 bgp_fsm(struct peer *peer, enum session_events event)
618 {
619 	switch (peer->state) {
620 	case STATE_NONE:
621 		/* nothing */
622 		break;
623 	case STATE_IDLE:
624 		switch (event) {
625 		case EVNT_START:
626 			timer_stop(peer, Timer_Hold);
627 			timer_stop(peer, Timer_Keepalive);
628 			timer_stop(peer, Timer_IdleHold);
629 
630 			/* allocate read buffer */
631 			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
632 			if (peer->rbuf == NULL)
633 				fatal(NULL);
634 
635 			/* init write buffer */
636 			msgbuf_init(&peer->wbuf);
637 
638 			/* init pfkey - remove old if any, load new ones */
639 			pfkey_remove(peer);
640 			if (pfkey_establish(peer) == -1) {
641 				log_peer_warnx(&peer->conf,
642 				    "pfkey setup failed");
643 				return;
644 			}
645 
646 			peer->stats.last_sent_errcode = 0;
647 			peer->stats.last_sent_suberr = 0;
648 
649 			if (!peer->depend_ok)
650 				timer_stop(peer, Timer_ConnectRetry);
651 			else if (peer->passive || peer->conf.passive ||
652 			    peer->conf.template) {
653 				change_state(peer, STATE_ACTIVE, event);
654 				timer_stop(peer, Timer_ConnectRetry);
655 			} else {
656 				change_state(peer, STATE_CONNECT, event);
657 				timer_set(peer, Timer_ConnectRetry,
658 				    conf->connectretry);
659 				session_connect(peer);
660 			}
661 			peer->passive = 0;
662 			break;
663 		default:
664 			/* ignore */
665 			break;
666 		}
667 		break;
668 	case STATE_CONNECT:
669 		switch (event) {
670 		case EVNT_START:
671 			/* ignore */
672 			break;
673 		case EVNT_CON_OPEN:
674 			session_tcp_established(peer);
675 			session_open(peer);
676 			timer_stop(peer, Timer_ConnectRetry);
677 			peer->holdtime = INTERVAL_HOLD_INITIAL;
678 			start_timer_holdtime(peer);
679 			change_state(peer, STATE_OPENSENT, event);
680 			break;
681 		case EVNT_CON_OPENFAIL:
682 			timer_set(peer, Timer_ConnectRetry,
683 			    conf->connectretry);
684 			session_close_connection(peer);
685 			change_state(peer, STATE_ACTIVE, event);
686 			break;
687 		case EVNT_TIMER_CONNRETRY:
688 			timer_set(peer, Timer_ConnectRetry,
689 			    conf->connectretry);
690 			session_connect(peer);
691 			break;
692 		default:
693 			change_state(peer, STATE_IDLE, event);
694 			break;
695 		}
696 		break;
697 	case STATE_ACTIVE:
698 		switch (event) {
699 		case EVNT_START:
700 			/* ignore */
701 			break;
702 		case EVNT_CON_OPEN:
703 			session_tcp_established(peer);
704 			session_open(peer);
705 			timer_stop(peer, Timer_ConnectRetry);
706 			peer->holdtime = INTERVAL_HOLD_INITIAL;
707 			start_timer_holdtime(peer);
708 			change_state(peer, STATE_OPENSENT, event);
709 			break;
710 		case EVNT_CON_OPENFAIL:
711 			timer_set(peer, Timer_ConnectRetry,
712 			    conf->connectretry);
713 			session_close_connection(peer);
714 			change_state(peer, STATE_ACTIVE, event);
715 			break;
716 		case EVNT_TIMER_CONNRETRY:
717 			timer_set(peer, Timer_ConnectRetry,
718 			    peer->holdtime);
719 			change_state(peer, STATE_CONNECT, event);
720 			session_connect(peer);
721 			break;
722 		default:
723 			change_state(peer, STATE_IDLE, event);
724 			break;
725 		}
726 		break;
727 	case STATE_OPENSENT:
728 		switch (event) {
729 		case EVNT_START:
730 			/* ignore */
731 			break;
732 		case EVNT_STOP:
733 			change_state(peer, STATE_IDLE, event);
734 			break;
735 		case EVNT_CON_CLOSED:
736 			session_close_connection(peer);
737 			timer_set(peer, Timer_ConnectRetry,
738 			    conf->connectretry);
739 			change_state(peer, STATE_ACTIVE, event);
740 			break;
741 		case EVNT_CON_FATAL:
742 			change_state(peer, STATE_IDLE, event);
743 			break;
744 		case EVNT_TIMER_HOLDTIME:
745 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
746 			    0, NULL, 0);
747 			change_state(peer, STATE_IDLE, event);
748 			break;
749 		case EVNT_RCVD_OPEN:
750 			/* parse_open calls change_state itself on failure */
751 			if (parse_open(peer))
752 				break;
753 			session_keepalive(peer);
754 			change_state(peer, STATE_OPENCONFIRM, event);
755 			break;
756 		case EVNT_RCVD_NOTIFICATION:
757 			if (parse_notification(peer)) {
758 				change_state(peer, STATE_IDLE, event);
759 				/* don't punish, capa negotiation */
760 				timer_set(peer, Timer_IdleHold, 0);
761 				peer->IdleHoldTime /= 2;
762 			} else
763 				change_state(peer, STATE_IDLE, event);
764 			break;
765 		default:
766 			session_notification(peer,
767 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
768 			change_state(peer, STATE_IDLE, event);
769 			break;
770 		}
771 		break;
772 	case STATE_OPENCONFIRM:
773 		switch (event) {
774 		case EVNT_START:
775 			/* ignore */
776 			break;
777 		case EVNT_STOP:
778 			change_state(peer, STATE_IDLE, event);
779 			break;
780 		case EVNT_CON_CLOSED:
781 		case EVNT_CON_FATAL:
782 			change_state(peer, STATE_IDLE, event);
783 			break;
784 		case EVNT_TIMER_HOLDTIME:
785 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
786 			    0, NULL, 0);
787 			change_state(peer, STATE_IDLE, event);
788 			break;
789 		case EVNT_TIMER_KEEPALIVE:
790 			session_keepalive(peer);
791 			break;
792 		case EVNT_RCVD_KEEPALIVE:
793 			start_timer_holdtime(peer);
794 			change_state(peer, STATE_ESTABLISHED, event);
795 			break;
796 		case EVNT_RCVD_NOTIFICATION:
797 			parse_notification(peer);
798 			change_state(peer, STATE_IDLE, event);
799 			break;
800 		default:
801 			session_notification(peer,
802 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
803 			change_state(peer, STATE_IDLE, event);
804 			break;
805 		}
806 		break;
807 	case STATE_ESTABLISHED:
808 		switch (event) {
809 		case EVNT_START:
810 			/* ignore */
811 			break;
812 		case EVNT_STOP:
813 			change_state(peer, STATE_IDLE, event);
814 			break;
815 		case EVNT_CON_CLOSED:
816 		case EVNT_CON_FATAL:
817 			change_state(peer, STATE_IDLE, event);
818 			break;
819 		case EVNT_TIMER_HOLDTIME:
820 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
821 			    0, NULL, 0);
822 			change_state(peer, STATE_IDLE, event);
823 			break;
824 		case EVNT_TIMER_KEEPALIVE:
825 			session_keepalive(peer);
826 			break;
827 		case EVNT_RCVD_KEEPALIVE:
828 			start_timer_holdtime(peer);
829 			break;
830 		case EVNT_RCVD_UPDATE:
831 			start_timer_holdtime(peer);
832 			if (parse_update(peer))
833 				change_state(peer, STATE_IDLE, event);
834 			else
835 				start_timer_holdtime(peer);
836 			break;
837 		case EVNT_RCVD_NOTIFICATION:
838 			parse_notification(peer);
839 			change_state(peer, STATE_IDLE, event);
840 			break;
841 		default:
842 			session_notification(peer,
843 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
844 			change_state(peer, STATE_IDLE, event);
845 			break;
846 		}
847 		break;
848 	}
849 }
850 
851 void
852 start_timer_holdtime(struct peer *peer)
853 {
854 	if (peer->holdtime > 0)
855 		timer_set(peer, Timer_Hold, peer->holdtime);
856 	else
857 		timer_stop(peer, Timer_Hold);
858 }
859 
860 void
861 start_timer_keepalive(struct peer *peer)
862 {
863 	if (peer->holdtime > 0)
864 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
865 	else
866 		timer_stop(peer, Timer_Keepalive);
867 }
868 
869 void
870 session_close_connection(struct peer *peer)
871 {
872 	if (peer->fd != -1)
873 		close(peer->fd);
874 
875 	peer->fd = peer->wbuf.fd = -1;
876 }
877 
878 void
879 change_state(struct peer *peer, enum session_state state,
880     enum session_events event)
881 {
882 	struct mrt	*mrt;
883 
884 	switch (state) {
885 	case STATE_IDLE:
886 		/* carp demotion first. new peers handled in init_peer */
887 		if (peer->state == STATE_ESTABLISHED &&
888 		    peer->conf.demote_group[0] && !peer->demoted)
889 			session_demote(peer, +1);
890 
891 		/*
892 		 * try to write out what's buffered (maybe a notification),
893 		 * don't bother if it fails
894 		 */
895 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
896 			msgbuf_write(&peer->wbuf);
897 
898 		/*
899 		 * we must start the timer for the next EVNT_START
900 		 * if we are coming here due to an error and the
901 		 * session was not established successfully before, the
902 		 * starttimerinterval needs to be exponentially increased
903 		 */
904 		if (peer->IdleHoldTime == 0)
905 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
906 		peer->holdtime = INTERVAL_HOLD_INITIAL;
907 		timer_stop(peer, Timer_ConnectRetry);
908 		timer_stop(peer, Timer_Keepalive);
909 		timer_stop(peer, Timer_Hold);
910 		timer_stop(peer, Timer_IdleHold);
911 		timer_stop(peer, Timer_IdleHoldReset);
912 		session_close_connection(peer);
913 		msgbuf_clear(&peer->wbuf);
914 		free(peer->rbuf);
915 		peer->rbuf = NULL;
916 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
917 		if (peer->state == STATE_ESTABLISHED)
918 			session_down(peer);
919 		if (event != EVNT_STOP) {
920 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
921 			if (event != EVNT_NONE &&
922 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
923 				peer->IdleHoldTime *= 2;
924 		}
925 		if (peer->state == STATE_NONE ||
926 		    peer->state == STATE_ESTABLISHED) {
927 			/* initialize capability negotiation structures */
928 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
929 			    sizeof(peer->capa.ann));
930 			if (!peer->conf.announce_capa)
931 				session_capa_ann_none(peer);
932 		}
933 		break;
934 	case STATE_CONNECT:
935 		break;
936 	case STATE_ACTIVE:
937 		break;
938 	case STATE_OPENSENT:
939 		break;
940 	case STATE_OPENCONFIRM:
941 		break;
942 	case STATE_ESTABLISHED:
943 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
944 		if (peer->demoted)
945 			timer_set(peer, Timer_CarpUndemote,
946 			    INTERVAL_HOLD_DEMOTED);
947 		session_up(peer);
948 		break;
949 	default:		/* something seriously fucked */
950 		break;
951 	}
952 
953 	log_statechange(peer, state, event);
954 	LIST_FOREACH(mrt, &mrthead, entry) {
955 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
956 			continue;
957 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
958 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
959 		    mrt->group_id == peer->conf.groupid))
960 			mrt_dump_state(mrt, peer->state, state, peer);
961 	}
962 	peer->prev_state = peer->state;
963 	peer->state = state;
964 }
965 
966 void
967 session_accept(int listenfd)
968 {
969 	int			 connfd;
970 	int			 opt;
971 	socklen_t		 len;
972 	struct sockaddr_storage	 cliaddr;
973 	struct peer		*p = NULL;
974 
975 	len = sizeof(cliaddr);
976 	if ((connfd = accept(listenfd,
977 	    (struct sockaddr *)&cliaddr, &len)) == -1) {
978 		if (errno == EWOULDBLOCK || errno == EINTR)
979 			return;
980 		else
981 			log_warn("accept");
982 	}
983 
984 	p = getpeerbyip((struct sockaddr *)&cliaddr);
985 
986 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
987 		if (timer_running(p, Timer_IdleHold, NULL)) {
988 			/* fast reconnect after clear */
989 			p->passive = 1;
990 			bgp_fsm(p, EVNT_START);
991 		}
992 	}
993 
994 	if (p != NULL &&
995 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
996 		if (p->fd != -1) {
997 			if (p->state == STATE_CONNECT)
998 				session_close_connection(p);
999 			else {
1000 				close(connfd);
1001 				return;
1002 			}
1003 		}
1004 
1005 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1006 			log_peer_warnx(&p->conf,
1007 			    "ipsec or md5sig configured but not available");
1008 			close(connfd);
1009 			return;
1010 		}
1011 
1012 		if (p->conf.auth.method == AUTH_MD5SIG) {
1013 			if (sysdep.no_md5sig) {
1014 				log_peer_warnx(&p->conf,
1015 				    "md5sig configured but not available");
1016 				close(connfd);
1017 				return;
1018 			}
1019 			len = sizeof(opt);
1020 			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1021 			    &opt, &len) == -1)
1022 				fatal("getsockopt TCP_MD5SIG");
1023 			if (!opt) {	/* non-md5'd connection! */
1024 				log_peer_warnx(&p->conf,
1025 				    "connection attempt without md5 signature");
1026 				close(connfd);
1027 				return;
1028 			}
1029 		}
1030 		p->fd = p->wbuf.fd = connfd;
1031 		if (session_setup_socket(p)) {
1032 			close(connfd);
1033 			return;
1034 		}
1035 		session_socket_blockmode(connfd, BM_NONBLOCK);
1036 		bgp_fsm(p, EVNT_CON_OPEN);
1037 	} else {
1038 		log_conn_attempt(p, (struct sockaddr *)&cliaddr);
1039 		close(connfd);
1040 	}
1041 }
1042 
1043 int
1044 session_connect(struct peer *peer)
1045 {
1046 	int			 opt = 1;
1047 	struct sockaddr		*sa;
1048 
1049 	/*
1050 	 * we do not need the overcomplicated collision detection RFC 1771
1051 	 * describes; we simply make sure there is only ever one concurrent
1052 	 * tcp connection per peer.
1053 	 */
1054 	if (peer->fd != -1)
1055 		return (-1);
1056 
1057 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), SOCK_STREAM,
1058 	    IPPROTO_TCP)) == -1) {
1059 		log_peer_warn(&peer->conf, "session_connect socket");
1060 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1061 		return (-1);
1062 	}
1063 
1064 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1065 		log_peer_warnx(&peer->conf,
1066 		    "ipsec or md5sig configured but not available");
1067 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1068 		return (-1);
1069 	}
1070 
1071 	if (peer->conf.auth.method == AUTH_MD5SIG) {
1072 		if (sysdep.no_md5sig) {
1073 			log_peer_warnx(&peer->conf,
1074 			    "md5sig configured but not available");
1075 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1076 			return (-1);
1077 		}
1078 		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1079 		    &opt, sizeof(opt)) == -1) {
1080 			log_peer_warn(&peer->conf, "setsockopt md5sig");
1081 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1082 			return (-1);
1083 		}
1084 	}
1085 	peer->wbuf.fd = peer->fd;
1086 
1087 	/* if update source is set we need to bind() */
1088 	if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) {
1089 		if (bind(peer->fd, sa, sa->sa_len) == -1) {
1090 			log_peer_warn(&peer->conf, "session_connect bind");
1091 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1092 			return (-1);
1093 		}
1094 	}
1095 
1096 	if (session_setup_socket(peer)) {
1097 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1098 		return (-1);
1099 	}
1100 
1101 	session_socket_blockmode(peer->fd, BM_NONBLOCK);
1102 
1103 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT);
1104 	if (connect(peer->fd, sa, sa->sa_len) == -1) {
1105 		if (errno != EINPROGRESS) {
1106 			if (errno != peer->lasterr)
1107 				log_peer_warn(&peer->conf, "connect");
1108 			peer->lasterr = errno;
1109 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1110 			return (-1);
1111 		}
1112 	} else
1113 		bgp_fsm(peer, EVNT_CON_OPEN);
1114 
1115 	return (0);
1116 }
1117 
1118 int
1119 session_setup_socket(struct peer *p)
1120 {
1121 	int	ttl = p->conf.distance;
1122 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1123 	int	nodelay = 1;
1124 	int	bsize;
1125 
1126 	switch (p->conf.remote_addr.aid) {
1127 	case AID_INET:
1128 		/* set precedence, see RFC 1771 appendix 5 */
1129 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1130 		    -1) {
1131 			log_peer_warn(&p->conf,
1132 			    "session_setup_socket setsockopt TOS");
1133 			return (-1);
1134 		}
1135 
1136 		if (p->conf.ebgp) {
1137 			/* set TTL to foreign router's distance
1138 			   1=direct n=multihop with ttlsec, we always use 255 */
1139 			if (p->conf.ttlsec) {
1140 				ttl = 256 - p->conf.distance;
1141 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1142 				    &ttl, sizeof(ttl)) == -1) {
1143 					log_peer_warn(&p->conf,
1144 					    "session_setup_socket: "
1145 					    "setsockopt MINTTL");
1146 					return (-1);
1147 				}
1148 				ttl = 255;
1149 			}
1150 
1151 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1152 			    sizeof(ttl)) == -1) {
1153 				log_peer_warn(&p->conf,
1154 				    "session_setup_socket setsockopt TTL");
1155 				return (-1);
1156 			}
1157 		}
1158 		break;
1159 	case AID_INET6:
1160 		if (p->conf.ebgp) {
1161 			/* set hoplimit to foreign router's distance */
1162 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1163 			    &ttl, sizeof(ttl)) == -1) {
1164 				log_peer_warn(&p->conf,
1165 				    "session_setup_socket setsockopt hoplimit");
1166 				return (-1);
1167 			}
1168 		}
1169 		break;
1170 	}
1171 
1172 	/* set TCP_NODELAY */
1173 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1174 	    sizeof(nodelay)) == -1) {
1175 		log_peer_warn(&p->conf,
1176 		    "session_setup_socket setsockopt TCP_NODELAY");
1177 		return (-1);
1178 	}
1179 
1180 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1181 	if (p->conf.auth.method != AUTH_NONE) {
1182 		/* try to increase bufsize. no biggie if it fails */
1183 		bsize = 65535;
1184 		while (setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1185 		    sizeof(bsize)) == -1)
1186 			bsize /= 2;
1187 		bsize = 65535;
1188 		while (setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1189 		    sizeof(bsize)) == -1)
1190 			bsize /= 2;
1191 	}
1192 
1193 	return (0);
1194 }
1195 
1196 void
1197 session_socket_blockmode(int fd, enum blockmodes bm)
1198 {
1199 	int	flags;
1200 
1201 	if ((flags = fcntl(fd, F_GETFL, 0)) == -1)
1202 		fatal("fcntl F_GETFL");
1203 
1204 	if (bm == BM_NONBLOCK)
1205 		flags |= O_NONBLOCK;
1206 	else
1207 		flags &= ~O_NONBLOCK;
1208 
1209 	if ((flags = fcntl(fd, F_SETFL, flags)) == -1)
1210 		fatal("fcntl F_SETFL");
1211 }
1212 
1213 void
1214 session_tcp_established(struct peer *peer)
1215 {
1216 	socklen_t	len;
1217 
1218 	len = sizeof(peer->sa_local);
1219 	if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local,
1220 	    &len) == -1)
1221 		log_warn("getsockname");
1222 	len = sizeof(peer->sa_remote);
1223 	if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote,
1224 	    &len) == -1)
1225 		log_warn("getpeername");
1226 }
1227 
1228 void
1229 session_capa_ann_none(struct peer *peer)
1230 {
1231 	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1232 }
1233 
1234 int
1235 session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len)
1236 {
1237 	int errs = 0;
1238 
1239 	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1240 	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1241 	return (errs);
1242 }
1243 
1244 int
1245 session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
1246 {
1247 	u_int8_t		 safi, pad = 0;
1248 	u_int16_t		 afi;
1249 	int			 errs = 0;
1250 
1251 	if (aid2afi(aid, &afi, &safi) == -1)
1252 		fatalx("session_capa_add_mp: bad afi/safi pair");
1253 	afi = htons(afi);
1254 	errs += ibuf_add(buf, &afi, sizeof(afi));
1255 	errs += ibuf_add(buf, &pad, sizeof(pad));
1256 	errs += ibuf_add(buf, &safi, sizeof(safi));
1257 
1258 	return (errs);
1259 }
1260 
1261 struct bgp_msg *
1262 session_newmsg(enum msg_type msgtype, u_int16_t len)
1263 {
1264 	struct bgp_msg		*msg;
1265 	struct msg_header	 hdr;
1266 	struct ibuf		*buf;
1267 	int			 errs = 0;
1268 
1269 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1270 	hdr.len = htons(len);
1271 	hdr.type = msgtype;
1272 
1273 	if ((buf = ibuf_open(len)) == NULL)
1274 		return (NULL);
1275 
1276 	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1277 	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1278 	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1279 
1280 	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1281 		ibuf_free(buf);
1282 		return (NULL);
1283 	}
1284 
1285 	msg->buf = buf;
1286 	msg->type = msgtype;
1287 	msg->len = len;
1288 
1289 	return (msg);
1290 }
1291 
1292 int
1293 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1294 {
1295 	struct mrt		*mrt;
1296 
1297 	LIST_FOREACH(mrt, &mrthead, entry) {
1298 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1299 		    mrt->type == MRT_UPDATE_OUT)))
1300 			continue;
1301 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1302 		    mrt->peer_id == p->conf.id || (mrt->group_id == 0 &&
1303 		    mrt->group_id == p->conf.groupid))
1304 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1305 	}
1306 
1307 	ibuf_close(&p->wbuf, msg->buf);
1308 	free(msg);
1309 	return (0);
1310 }
1311 
1312 void
1313 session_open(struct peer *p)
1314 {
1315 	struct bgp_msg		*buf;
1316 	struct ibuf		*opb;
1317 	struct msg_open		 msg;
1318 	u_int16_t		 len;
1319 	u_int8_t		 i, op_type, optparamlen = 0;
1320 	int			 errs = 0;
1321 
1322 
1323 	if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1324 	    sizeof(optparamlen))) == NULL) {
1325 		bgp_fsm(p, EVNT_CON_FATAL);
1326 		return;
1327 	}
1328 
1329 	/* multiprotocol extensions, RFC 4760 */
1330 	for (i = 0; i < AID_MAX; i++)
1331 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1332 			errs += session_capa_add(opb, CAPA_MP, 4);
1333 			errs += session_capa_add_mp(opb, i);
1334 		}
1335 
1336 	/* route refresh, RFC 2918 */
1337 	if (p->capa.ann.refresh)	/* no data */
1338 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1339 
1340 	/* End-of-RIB marker, RFC 4724 */
1341 	if (p->capa.ann.restart) {	/* 2 bytes data */
1342 		u_char		c[2];
1343 
1344 		c[0] = 0x80; /* we're always restarting */
1345 		c[1] = 0;
1346 		errs += session_capa_add(opb, CAPA_RESTART, 2);
1347 		errs += ibuf_add(opb, &c, 2);
1348 	}
1349 
1350 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1351 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1352 		u_int32_t	nas;
1353 
1354 		nas = htonl(conf->as);
1355 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1356 		errs += ibuf_add(opb, &nas, sizeof(nas));
1357 	}
1358 
1359 	if (ibuf_size(opb))
1360 		optparamlen = ibuf_size(opb) + sizeof(op_type) +
1361 		    sizeof(optparamlen);
1362 
1363 	len = MSGSIZE_OPEN_MIN + optparamlen;
1364 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1365 		ibuf_free(opb);
1366 		bgp_fsm(p, EVNT_CON_FATAL);
1367 		return;
1368 	}
1369 
1370 	msg.version = 4;
1371 	msg.myas = htons(conf->short_as);
1372 	if (p->conf.holdtime)
1373 		msg.holdtime = htons(p->conf.holdtime);
1374 	else
1375 		msg.holdtime = htons(conf->holdtime);
1376 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1377 	msg.optparamlen = optparamlen;
1378 
1379 	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1380 	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1381 	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1382 	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1383 	errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1384 
1385 	if (optparamlen) {
1386 		op_type = OPT_PARAM_CAPABILITIES;
1387 		optparamlen = ibuf_size(opb);
1388 		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1389 		errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1390 		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1391 	}
1392 
1393 	ibuf_free(opb);
1394 
1395 	if (errs) {
1396 		ibuf_free(buf->buf);
1397 		free(buf);
1398 		bgp_fsm(p, EVNT_CON_FATAL);
1399 		return;
1400 	}
1401 
1402 	if (session_sendmsg(buf, p) == -1) {
1403 		bgp_fsm(p, EVNT_CON_FATAL);
1404 		return;
1405 	}
1406 
1407 	p->stats.msg_sent_open++;
1408 }
1409 
1410 void
1411 session_keepalive(struct peer *p)
1412 {
1413 	struct bgp_msg		*buf;
1414 
1415 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1416 	    session_sendmsg(buf, p) == -1) {
1417 		bgp_fsm(p, EVNT_CON_FATAL);
1418 		return;
1419 	}
1420 
1421 	start_timer_keepalive(p);
1422 	p->stats.msg_sent_keepalive++;
1423 }
1424 
1425 void
1426 session_update(u_int32_t peerid, void *data, size_t datalen)
1427 {
1428 	struct peer		*p;
1429 	struct bgp_msg		*buf;
1430 
1431 	if ((p = getpeerbyid(peerid)) == NULL) {
1432 		log_warnx("no such peer: id=%u", peerid);
1433 		return;
1434 	}
1435 
1436 	if (p->state != STATE_ESTABLISHED)
1437 		return;
1438 
1439 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1440 		bgp_fsm(p, EVNT_CON_FATAL);
1441 		return;
1442 	}
1443 
1444 	if (ibuf_add(buf->buf, data, datalen)) {
1445 		ibuf_free(buf->buf);
1446 		free(buf);
1447 		bgp_fsm(p, EVNT_CON_FATAL);
1448 		return;
1449 	}
1450 
1451 	if (session_sendmsg(buf, p) == -1) {
1452 		bgp_fsm(p, EVNT_CON_FATAL);
1453 		return;
1454 	}
1455 
1456 	start_timer_keepalive(p);
1457 	p->stats.msg_sent_update++;
1458 }
1459 
1460 void
1461 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1462     void *data, ssize_t datalen)
1463 {
1464 	struct bgp_msg		*buf;
1465 	int			 errs = 0;
1466 
1467 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1468 		return;
1469 
1470 	log_notification(p, errcode, subcode, data, datalen, "sending");
1471 
1472 	if ((buf = session_newmsg(NOTIFICATION,
1473 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1474 		bgp_fsm(p, EVNT_CON_FATAL);
1475 		return;
1476 	}
1477 
1478 	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1479 	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1480 
1481 	if (datalen > 0)
1482 		errs += ibuf_add(buf->buf, data, datalen);
1483 
1484 	if (errs) {
1485 		ibuf_free(buf->buf);
1486 		free(buf);
1487 		bgp_fsm(p, EVNT_CON_FATAL);
1488 		return;
1489 	}
1490 
1491 	if (session_sendmsg(buf, p) == -1) {
1492 		bgp_fsm(p, EVNT_CON_FATAL);
1493 		return;
1494 	}
1495 
1496 	p->stats.msg_sent_notification++;
1497 	p->stats.last_sent_errcode = errcode;
1498 	p->stats.last_sent_suberr = subcode;
1499 }
1500 
1501 int
1502 session_neighbor_rrefresh(struct peer *p)
1503 {
1504 	u_int8_t	i;
1505 
1506 	if (!p->capa.peer.refresh)
1507 		return (-1);
1508 
1509 	for (i = 0; i < AID_MAX; i++) {
1510 		if (p->capa.peer.mp[i] != 0)
1511 			session_rrefresh(p, i);
1512 	}
1513 
1514 	return (0);
1515 }
1516 
1517 void
1518 session_rrefresh(struct peer *p, u_int8_t aid)
1519 {
1520 	struct bgp_msg		*buf;
1521 	int			 errs = 0;
1522 	u_int16_t		 afi;
1523 	u_int8_t		 safi, null8 = 0;
1524 
1525 	if (aid2afi(aid, &afi, &safi) == -1)
1526 		fatalx("session_rrefresh: bad afi/safi pair");
1527 
1528 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1529 		bgp_fsm(p, EVNT_CON_FATAL);
1530 		return;
1531 	}
1532 
1533 	afi = htons(afi);
1534 	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1535 	errs += ibuf_add(buf->buf, &null8, sizeof(null8));
1536 	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1537 
1538 	if (errs) {
1539 		ibuf_free(buf->buf);
1540 		free(buf);
1541 		bgp_fsm(p, EVNT_CON_FATAL);
1542 		return;
1543 	}
1544 
1545 	if (session_sendmsg(buf, p) == -1) {
1546 		bgp_fsm(p, EVNT_CON_FATAL);
1547 		return;
1548 	}
1549 
1550 	p->stats.msg_sent_rrefresh++;
1551 }
1552 
1553 int
1554 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1555 {
1556 	ssize_t		n, rpos, av, left;
1557 	socklen_t	len;
1558 	int		error, processed = 0;
1559 	u_int16_t	msglen;
1560 	u_int8_t	msgtype;
1561 
1562 	if (p->state == STATE_CONNECT) {
1563 		if (pfd->revents & POLLOUT) {
1564 			if (pfd->revents & POLLIN) {
1565 				/* error occurred */
1566 				len = sizeof(error);
1567 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1568 				    &error, &len) == -1 || error) {
1569 					if (error)
1570 						errno = error;
1571 					if (errno != p->lasterr) {
1572 						log_peer_warn(&p->conf,
1573 						    "socket error");
1574 						p->lasterr = errno;
1575 					}
1576 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1577 					return (1);
1578 				}
1579 			}
1580 			bgp_fsm(p, EVNT_CON_OPEN);
1581 			return (1);
1582 		}
1583 		if (pfd->revents & POLLHUP) {
1584 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1585 			return (1);
1586 		}
1587 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1588 			bgp_fsm(p, EVNT_CON_FATAL);
1589 			return (1);
1590 		}
1591 		return (0);
1592 	}
1593 
1594 	if (pfd->revents & POLLHUP) {
1595 		bgp_fsm(p, EVNT_CON_CLOSED);
1596 		return (1);
1597 	}
1598 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1599 		bgp_fsm(p, EVNT_CON_FATAL);
1600 		return (1);
1601 	}
1602 
1603 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1604 		if ((error = msgbuf_write(&p->wbuf)) < 0) {
1605 			if (error == -2)
1606 				log_peer_warnx(&p->conf, "Connection closed");
1607 			else
1608 				log_peer_warn(&p->conf, "write error");
1609 			bgp_fsm(p, EVNT_CON_FATAL);
1610 			return (1);
1611 		}
1612 		if (!(pfd->revents & POLLIN))
1613 			return (1);
1614 	}
1615 
1616 	if (p->rbuf && pfd->revents & POLLIN) {
1617 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1618 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1619 			if (errno != EINTR && errno != EAGAIN) {
1620 				log_peer_warn(&p->conf, "read error");
1621 				bgp_fsm(p, EVNT_CON_FATAL);
1622 			}
1623 			return (1);
1624 		}
1625 		if (n == 0) {	/* connection closed */
1626 			bgp_fsm(p, EVNT_CON_CLOSED);
1627 			return (1);
1628 		}
1629 
1630 		rpos = 0;
1631 		av = p->rbuf->wpos + n;
1632 		p->stats.last_read = time(NULL);
1633 
1634 		/*
1635 		 * session might drop to IDLE -> buffers deallocated
1636 		 * we MUST check rbuf != NULL before use
1637 		 */
1638 		for (;;) {
1639 			if (rpos + MSGSIZE_HEADER > av)
1640 				break;
1641 			if (p->rbuf == NULL)
1642 				break;
1643 			if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1644 			    &msgtype) == -1)
1645 				return (0);
1646 			if (rpos + msglen > av)
1647 				break;
1648 			p->rbuf->rptr = p->rbuf->buf + rpos;
1649 
1650 			switch (msgtype) {
1651 			case OPEN:
1652 				bgp_fsm(p, EVNT_RCVD_OPEN);
1653 				p->stats.msg_rcvd_open++;
1654 				break;
1655 			case UPDATE:
1656 				bgp_fsm(p, EVNT_RCVD_UPDATE);
1657 				p->stats.msg_rcvd_update++;
1658 				break;
1659 			case NOTIFICATION:
1660 				bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1661 				p->stats.msg_rcvd_notification++;
1662 				break;
1663 			case KEEPALIVE:
1664 				bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1665 				p->stats.msg_rcvd_keepalive++;
1666 				break;
1667 			case RREFRESH:
1668 				parse_refresh(p);
1669 				p->stats.msg_rcvd_rrefresh++;
1670 				break;
1671 			default:	/* cannot happen */
1672 				session_notification(p, ERR_HEADER,
1673 				    ERR_HDR_TYPE, &msgtype, 1);
1674 				log_warnx("received message with "
1675 				    "unknown type %u", msgtype);
1676 				bgp_fsm(p, EVNT_CON_FATAL);
1677 			}
1678 			rpos += msglen;
1679 			if (++processed > MSG_PROCESS_LIMIT)
1680 				break;
1681 		}
1682 		if (p->rbuf == NULL)
1683 			return (1);
1684 
1685 		if (rpos < av) {
1686 			left = av - rpos;
1687 			memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1688 			p->rbuf->wpos = left;
1689 		} else
1690 			p->rbuf->wpos = 0;
1691 
1692 		return (1);
1693 	}
1694 	return (0);
1695 }
1696 
1697 int
1698 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1699 {
1700 	struct mrt		*mrt;
1701 	u_char			*p;
1702 	u_int16_t		 olen;
1703 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1704 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1705 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1706 
1707 	/* caller MUST make sure we are getting 19 bytes! */
1708 	p = data;
1709 	if (memcmp(p, marker, sizeof(marker))) {
1710 		log_peer_warnx(&peer->conf, "sync error");
1711 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1712 		bgp_fsm(peer, EVNT_CON_FATAL);
1713 		return (-1);
1714 	}
1715 	p += MSGSIZE_HEADER_MARKER;
1716 
1717 	memcpy(&olen, p, 2);
1718 	*len = ntohs(olen);
1719 	p += 2;
1720 	memcpy(type, p, 1);
1721 
1722 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1723 		log_peer_warnx(&peer->conf,
1724 		    "received message: illegal length: %u byte", *len);
1725 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1726 		    &olen, sizeof(olen));
1727 		bgp_fsm(peer, EVNT_CON_FATAL);
1728 		return (-1);
1729 	}
1730 
1731 	switch (*type) {
1732 	case OPEN:
1733 		if (*len < MSGSIZE_OPEN_MIN) {
1734 			log_peer_warnx(&peer->conf,
1735 			    "received OPEN: illegal len: %u byte", *len);
1736 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1737 			    &olen, sizeof(olen));
1738 			bgp_fsm(peer, EVNT_CON_FATAL);
1739 			return (-1);
1740 		}
1741 		break;
1742 	case NOTIFICATION:
1743 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1744 			log_peer_warnx(&peer->conf,
1745 			    "received NOTIFICATION: illegal len: %u byte",
1746 			    *len);
1747 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1748 			    &olen, sizeof(olen));
1749 			bgp_fsm(peer, EVNT_CON_FATAL);
1750 			return (-1);
1751 		}
1752 		break;
1753 	case UPDATE:
1754 		if (*len < MSGSIZE_UPDATE_MIN) {
1755 			log_peer_warnx(&peer->conf,
1756 			    "received UPDATE: illegal len: %u byte", *len);
1757 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1758 			    &olen, sizeof(olen));
1759 			bgp_fsm(peer, EVNT_CON_FATAL);
1760 			return (-1);
1761 		}
1762 		break;
1763 	case KEEPALIVE:
1764 		if (*len != MSGSIZE_KEEPALIVE) {
1765 			log_peer_warnx(&peer->conf,
1766 			    "received KEEPALIVE: illegal len: %u byte", *len);
1767 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1768 			    &olen, sizeof(olen));
1769 			bgp_fsm(peer, EVNT_CON_FATAL);
1770 			return (-1);
1771 		}
1772 		break;
1773 	case RREFRESH:
1774 		if (*len != MSGSIZE_RREFRESH) {
1775 			log_peer_warnx(&peer->conf,
1776 			    "received RREFRESH: illegal len: %u byte", *len);
1777 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1778 			    &olen, sizeof(olen));
1779 			bgp_fsm(peer, EVNT_CON_FATAL);
1780 			return (-1);
1781 		}
1782 		break;
1783 	default:
1784 		log_peer_warnx(&peer->conf,
1785 		    "received msg with unknown type %u", *type);
1786 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1787 		    type, 1);
1788 		bgp_fsm(peer, EVNT_CON_FATAL);
1789 		return (-1);
1790 	}
1791 	LIST_FOREACH(mrt, &mrthead, entry) {
1792 		if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE &&
1793 		    mrt->type == MRT_UPDATE_IN)))
1794 			continue;
1795 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1796 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1797 		    mrt->group_id == peer->conf.groupid))
1798 			mrt_dump_bgp_msg(mrt, data, *len, peer);
1799 	}
1800 	return (0);
1801 }
1802 
1803 int
1804 parse_open(struct peer *peer)
1805 {
1806 	u_char		*p, *op_val;
1807 	u_int8_t	 version, rversion;
1808 	u_int16_t	 short_as, msglen;
1809 	u_int16_t	 holdtime, oholdtime, myholdtime;
1810 	u_int32_t	 as, bgpid;
1811 	u_int8_t	 optparamlen, plen;
1812 	u_int8_t	 op_type, op_len;
1813 
1814 	p = peer->rbuf->rptr;
1815 	p += MSGSIZE_HEADER_MARKER;
1816 	memcpy(&msglen, p, sizeof(msglen));
1817 	msglen = ntohs(msglen);
1818 
1819 	p = peer->rbuf->rptr;
1820 	p += MSGSIZE_HEADER;	/* header is already checked */
1821 
1822 	memcpy(&version, p, sizeof(version));
1823 	p += sizeof(version);
1824 
1825 	if (version != BGP_VERSION) {
1826 		log_peer_warnx(&peer->conf,
1827 		    "peer wants unrecognized version %u", version);
1828 		if (version > BGP_VERSION)
1829 			rversion = version - BGP_VERSION;
1830 		else
1831 			rversion = BGP_VERSION;
1832 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
1833 		    &rversion, sizeof(rversion));
1834 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1835 		return (-1);
1836 	}
1837 
1838 	memcpy(&short_as, p, sizeof(short_as));
1839 	p += sizeof(short_as);
1840 	as = peer->short_as = ntohs(short_as);
1841 
1842 	memcpy(&oholdtime, p, sizeof(oholdtime));
1843 	p += sizeof(oholdtime);
1844 
1845 	holdtime = ntohs(oholdtime);
1846 	if (holdtime && holdtime < peer->conf.min_holdtime) {
1847 		log_peer_warnx(&peer->conf,
1848 		    "peer requests unacceptable holdtime %u", holdtime);
1849 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
1850 		    NULL, 0);
1851 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1852 		return (-1);
1853 	}
1854 
1855 	myholdtime = peer->conf.holdtime;
1856 	if (!myholdtime)
1857 		myholdtime = conf->holdtime;
1858 	if (holdtime < myholdtime)
1859 		peer->holdtime = holdtime;
1860 	else
1861 		peer->holdtime = myholdtime;
1862 
1863 	memcpy(&bgpid, p, sizeof(bgpid));
1864 	p += sizeof(bgpid);
1865 
1866 	/* check bgpid for validity - just disallow 0 */
1867 	if (ntohl(bgpid) == 0) {
1868 		log_peer_warnx(&peer->conf, "peer BGPID %lu unacceptable",
1869 		    ntohl(bgpid));
1870 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
1871 		    NULL, 0);
1872 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1873 		return (-1);
1874 	}
1875 	peer->remote_bgpid = bgpid;
1876 
1877 	memcpy(&optparamlen, p, sizeof(optparamlen));
1878 	p += sizeof(optparamlen);
1879 
1880 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
1881 			log_peer_warnx(&peer->conf,
1882 			    "corrupt OPEN message received: length mismatch");
1883 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1884 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1885 			return (-1);
1886 	}
1887 
1888 	plen = optparamlen;
1889 	while (plen > 0) {
1890 		if (plen < 2) {
1891 			log_peer_warnx(&peer->conf,
1892 			    "corrupt OPEN message received, len wrong");
1893 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1894 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1895 			return (-1);
1896 		}
1897 		memcpy(&op_type, p, sizeof(op_type));
1898 		p += sizeof(op_type);
1899 		plen -= sizeof(op_type);
1900 		memcpy(&op_len, p, sizeof(op_len));
1901 		p += sizeof(op_len);
1902 		plen -= sizeof(op_len);
1903 		if (op_len > 0) {
1904 			if (plen < op_len) {
1905 				log_peer_warnx(&peer->conf,
1906 				    "corrupt OPEN message received, len wrong");
1907 				session_notification(peer, ERR_OPEN, 0,
1908 				    NULL, 0);
1909 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1910 				return (-1);
1911 			}
1912 			op_val = p;
1913 			p += op_len;
1914 			plen -= op_len;
1915 		} else
1916 			op_val = NULL;
1917 
1918 		switch (op_type) {
1919 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
1920 			if (parse_capabilities(peer, op_val, op_len,
1921 			    &as) == -1) {
1922 				session_notification(peer, ERR_OPEN, 0,
1923 				    NULL, 0);
1924 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1925 				return (-1);
1926 			}
1927 			break;
1928 		case OPT_PARAM_AUTH:			/* deprecated */
1929 		default:
1930 			/*
1931 			 * unsupported type
1932 			 * the RFCs tell us to leave the data section empty
1933 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
1934 			 * How the peer should know _which_ optional parameter
1935 			 * we don't support is beyond me.
1936 			 */
1937 			log_peer_warnx(&peer->conf,
1938 			    "received OPEN message with unsupported optional "
1939 			    "parameter: type %u", op_type);
1940 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
1941 				NULL, 0);
1942 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1943 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
1944 			peer->IdleHoldTime /= 2;
1945 			return (-1);
1946 		}
1947 	}
1948 
1949 	/* if remote-as is zero and it's a cloned neighbor, accept any */
1950 	if (peer->conf.cloned && !peer->conf.remote_as && as != AS_TRANS) {
1951 		peer->conf.remote_as = as;
1952 		peer->conf.ebgp = (peer->conf.remote_as != conf->as);
1953 		if (!peer->conf.ebgp)
1954 			/* force enforce_as off for iBGP sessions */
1955 			peer->conf.enforce_as = ENFORCE_AS_OFF;
1956 	}
1957 
1958 	if (peer->conf.remote_as != as) {
1959 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
1960 		    log_as(as));
1961 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
1962 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1963 		return (-1);
1964 	}
1965 
1966 	if (capa_neg_calc(peer) == -1) {
1967 		log_peer_warnx(&peer->conf,
1968 		    "capabilitiy negotiation calculation failed");
1969 		session_notification(peer, ERR_OPEN, 0, NULL, 0);
1970 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1971 		return (-1);
1972 	}
1973 
1974 	return (0);
1975 }
1976 
1977 int
1978 parse_update(struct peer *peer)
1979 {
1980 	u_char		*p;
1981 	u_int16_t	 datalen;
1982 
1983 	/*
1984 	 * we pass the message verbatim to the rde.
1985 	 * in case of errors the whole session is reset with a
1986 	 * notification anyway, we only need to know the peer
1987 	 */
1988 	p = peer->rbuf->rptr;
1989 	p += MSGSIZE_HEADER_MARKER;
1990 	memcpy(&datalen, p, sizeof(datalen));
1991 	datalen = ntohs(datalen);
1992 
1993 	p = peer->rbuf->rptr;
1994 	p += MSGSIZE_HEADER;	/* header is already checked */
1995 	datalen -= MSGSIZE_HEADER;
1996 
1997 	if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p,
1998 	    datalen) == -1)
1999 		return (-1);
2000 
2001 	return (0);
2002 }
2003 
2004 int
2005 parse_refresh(struct peer *peer)
2006 {
2007 	u_char		*p;
2008 	u_int16_t	 afi;
2009 	u_int8_t	 aid, safi;
2010 
2011 	p = peer->rbuf->rptr;
2012 	p += MSGSIZE_HEADER;	/* header is already checked */
2013 
2014 	/*
2015 	 * We could check if we actually announced the capability but
2016 	 * as long as the message is correctly encoded we don't care.
2017 	 */
2018 
2019 	/* afi, 2 byte */
2020 	memcpy(&afi, p, sizeof(afi));
2021 	afi = ntohs(afi);
2022 	p += 2;
2023 	/* reserved, 1 byte */
2024 	p += 1;
2025 	/* safi, 1 byte */
2026 	memcpy(&safi, p, sizeof(safi));
2027 
2028 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2029 	if (afi2aid(afi, safi, &aid) == -1) {
2030 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2031 		    "invalid afi/safi pair");
2032 		return (0);
2033 	}
2034 
2035 	if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid,
2036 	    sizeof(aid)) == -1)
2037 		return (-1);
2038 
2039 	return (0);
2040 }
2041 
2042 int
2043 parse_notification(struct peer *peer)
2044 {
2045 	u_char		*p;
2046 	u_int16_t	 datalen;
2047 	u_int8_t	 errcode;
2048 	u_int8_t	 subcode;
2049 	u_int8_t	 capa_code;
2050 	u_int8_t	 capa_len;
2051 	u_int8_t	 i;
2052 
2053 	/* just log */
2054 	p = peer->rbuf->rptr;
2055 	p += MSGSIZE_HEADER_MARKER;
2056 	memcpy(&datalen, p, sizeof(datalen));
2057 	datalen = ntohs(datalen);
2058 
2059 	p = peer->rbuf->rptr;
2060 	p += MSGSIZE_HEADER;	/* header is already checked */
2061 	datalen -= MSGSIZE_HEADER;
2062 
2063 	memcpy(&errcode, p, sizeof(errcode));
2064 	p += sizeof(errcode);
2065 	datalen -= sizeof(errcode);
2066 
2067 	memcpy(&subcode, p, sizeof(subcode));
2068 	p += sizeof(subcode);
2069 	datalen -= sizeof(subcode);
2070 
2071 	log_notification(peer, errcode, subcode, p, datalen, "received");
2072 	peer->errcnt++;
2073 
2074 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2075 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2076 			log_peer_warnx(&peer->conf, "received \"unsupported "
2077 			    "capability\" notification without data part, "
2078 			    "disabling capability announcements altogether");
2079 			session_capa_ann_none(peer);
2080 		}
2081 
2082 		while (datalen > 0) {
2083 			if (datalen < 2) {
2084 				log_peer_warnx(&peer->conf,
2085 				    "parse_notification: "
2086 				    "expect len >= 2, len is %u", datalen);
2087 				return (-1);
2088 			}
2089 			memcpy(&capa_code, p, sizeof(capa_code));
2090 			p += sizeof(capa_code);
2091 			datalen -= sizeof(capa_code);
2092 			memcpy(&capa_len, p, sizeof(capa_len));
2093 			p += sizeof(capa_len);
2094 			datalen -= sizeof(capa_len);
2095 			if (datalen < capa_len) {
2096 				log_peer_warnx(&peer->conf,
2097 				    "parse_notification: capa_len %u exceeds "
2098 				    "remaining msg length %u", capa_len,
2099 				    datalen);
2100 				return (-1);
2101 			}
2102 			p += capa_len;
2103 			datalen -= capa_len;
2104 			switch (capa_code) {
2105 			case CAPA_MP:
2106 				for (i = 0; i < AID_MAX; i++)
2107 					peer->capa.ann.mp[i] = 0;
2108 				log_peer_warnx(&peer->conf,
2109 				    "disabling multiprotocol capability");
2110 				break;
2111 			case CAPA_REFRESH:
2112 				peer->capa.ann.refresh = 0;
2113 				log_peer_warnx(&peer->conf,
2114 				    "disabling route refresh capability");
2115 				break;
2116 			case CAPA_RESTART:
2117 				peer->capa.ann.restart = 0;
2118 				log_peer_warnx(&peer->conf,
2119 				    "disabling restart capability");
2120 				break;
2121 			case CAPA_AS4BYTE:
2122 				peer->capa.ann.as4byte = 0;
2123 				log_peer_warnx(&peer->conf,
2124 				    "disabling 4-byte AS num capability");
2125 				break;
2126 			default:	/* should not happen... */
2127 				log_peer_warnx(&peer->conf, "received "
2128 				    "\"unsupported capability\" notification "
2129 				    "for unknown capability %u, disabling "
2130 				    "capability announcements altogether",
2131 				    capa_code);
2132 				session_capa_ann_none(peer);
2133 				break;
2134 			}
2135 		}
2136 
2137 		return (1);
2138 	}
2139 
2140 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2141 		session_capa_ann_none(peer);
2142 		return (1);
2143 	}
2144 
2145 	return (0);
2146 }
2147 
2148 int
2149 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2150 {
2151 	u_char		*capa_val;
2152 	u_int32_t	 remote_as;
2153 	u_int16_t	 len;
2154 	u_int16_t	 afi;
2155 	u_int8_t	 safi;
2156 	u_int8_t	 aid;
2157 	u_int8_t	 capa_code;
2158 	u_int8_t	 capa_len;
2159 
2160 	len = dlen;
2161 	while (len > 0) {
2162 		if (len < 2) {
2163 			log_peer_warnx(&peer->conf, "parse_capabilities: "
2164 			    "expect len >= 2, len is %u", len);
2165 			return (-1);
2166 		}
2167 		memcpy(&capa_code, d, sizeof(capa_code));
2168 		d += sizeof(capa_code);
2169 		len -= sizeof(capa_code);
2170 		memcpy(&capa_len, d, sizeof(capa_len));
2171 		d += sizeof(capa_len);
2172 		len -= sizeof(capa_len);
2173 		if (capa_len > 0) {
2174 			if (len < capa_len) {
2175 				log_peer_warnx(&peer->conf,
2176 				    "parse_capabilities: "
2177 				    "len %u smaller than capa_len %u",
2178 				    len, capa_len);
2179 				return (-1);
2180 			}
2181 			capa_val = d;
2182 			d += capa_len;
2183 			len -= capa_len;
2184 		} else
2185 			capa_val = NULL;
2186 
2187 		switch (capa_code) {
2188 		case CAPA_MP:			/* RFC 4760 */
2189 			if (capa_len != 4) {
2190 				log_peer_warnx(&peer->conf,
2191 				    "parse_capabilities: "
2192 				    "expect len 4, len is %u", capa_len);
2193 				return (-1);
2194 			}
2195 			memcpy(&afi, capa_val, sizeof(afi));
2196 			afi = ntohs(afi);
2197 			memcpy(&safi, capa_val + 3, sizeof(safi));
2198 			if (afi2aid(afi, safi, &aid) == -1) {
2199 				log_peer_warnx(&peer->conf,
2200 				    "parse_capabilities: AFI %u, "
2201 				    "safi %u unknown", afi, safi);
2202 				break;
2203 			}
2204 			peer->capa.peer.mp[aid] = 1;
2205 			break;
2206 		case CAPA_REFRESH:
2207 			peer->capa.peer.refresh = 1;
2208 			break;
2209 		case CAPA_RESTART:
2210 			peer->capa.peer.restart = 1;
2211 			/* we don't care about the further restart capas yet */
2212 			break;
2213 		case CAPA_AS4BYTE:
2214 			if (capa_len != 4) {
2215 				log_peer_warnx(&peer->conf,
2216 				    "parse_capabilities: "
2217 				    "expect len 4, len is %u", capa_len);
2218 				return (-1);
2219 			}
2220 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2221 			*as = ntohl(remote_as);
2222 			peer->capa.peer.as4byte = 1;
2223 			break;
2224 		default:
2225 			break;
2226 		}
2227 	}
2228 
2229 	return (0);
2230 }
2231 
2232 int
2233 capa_neg_calc(struct peer *p)
2234 {
2235 	u_int8_t	i, hasmp = 0;
2236 
2237 	/* refresh: does not realy matter here, use peer setting */
2238 	p->capa.neg.refresh = p->capa.peer.refresh;
2239 
2240 	/* as4byte: both side must announce capability */
2241 	if (p->capa.ann.as4byte && p->capa.peer.as4byte)
2242 		p->capa.neg.as4byte = 1;
2243 	else
2244 		p->capa.neg.as4byte = 0;
2245 
2246 	/* MP: both side must announce capability */
2247 	for (i = 0; i < AID_MAX; i++) {
2248 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
2249 			p->capa.neg.mp[i] = 1;
2250 			hasmp = 1;
2251 		} else
2252 			p->capa.neg.mp[i] = 0;
2253 	}
2254 	/* if no MP capability present for default IPv4 unicast mode */
2255 	if (!hasmp)
2256 		p->capa.neg.mp[AID_INET] = 1;
2257 
2258 	p->capa.neg.restart = p->capa.peer.restart;
2259 
2260 	return (0);
2261 }
2262 
2263 void
2264 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2265 {
2266 	struct imsg		 imsg;
2267 	struct mrt		 xmrt;
2268 	struct mrt		*mrt;
2269 	struct peer_config	*pconf;
2270 	struct peer		*p, *next;
2271 	struct listen_addr	*la, *nla;
2272 	struct kif		*kif;
2273 	u_char			*data;
2274 	enum reconf_action	 reconf;
2275 	int			 n, depend_ok, restricted;
2276 	u_int8_t		 errcode, subcode;
2277 
2278 	if ((n = imsg_read(ibuf)) == -1)
2279 		fatal("session_dispatch_imsg: imsg_read error");
2280 
2281 	if (n == 0)	/* connection closed */
2282 		fatalx("session_dispatch_imsg: pipe closed");
2283 
2284 	for (;;) {
2285 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2286 			fatal("session_dispatch_imsg: imsg_get error");
2287 
2288 		if (n == 0)
2289 			break;
2290 
2291 		switch (imsg.hdr.type) {
2292 		case IMSG_RECONF_CONF:
2293 			if (idx != PFD_PIPE_MAIN)
2294 				fatalx("reconf request not from parent");
2295 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
2296 			    NULL)
2297 				fatal(NULL);
2298 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
2299 			if ((nconf->listen_addrs = calloc(1,
2300 			    sizeof(struct listen_addrs))) == NULL)
2301 				fatal(NULL);
2302 			TAILQ_INIT(nconf->listen_addrs);
2303 			npeers = NULL;
2304 			init_conf(nconf);
2305 			pending_reconf = 1;
2306 			break;
2307 		case IMSG_RECONF_PEER:
2308 			if (idx != PFD_PIPE_MAIN)
2309 				fatalx("reconf request not from parent");
2310 			pconf = imsg.data;
2311 			p = getpeerbyaddr(&pconf->remote_addr);
2312 			if (p == NULL) {
2313 				if ((p = calloc(1, sizeof(struct peer))) ==
2314 				    NULL)
2315 					fatal("new_peer");
2316 				p->state = p->prev_state = STATE_NONE;
2317 				p->next = npeers;
2318 				npeers = p;
2319 				reconf = RECONF_REINIT;
2320 			} else
2321 				reconf = RECONF_KEEP;
2322 
2323 			memcpy(&p->conf, pconf, sizeof(struct peer_config));
2324 			p->conf.reconf_action = reconf;
2325 			break;
2326 		case IMSG_RECONF_LISTENER:
2327 			if (idx != PFD_PIPE_MAIN)
2328 				fatalx("reconf request not from parent");
2329 			if (nconf == NULL)
2330 				fatalx("IMSG_RECONF_LISTENER but no config");
2331 			nla = imsg.data;
2332 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2333 				if (!la_cmp(la, nla))
2334 					break;
2335 
2336 			if (la == NULL) {
2337 				if (nla->reconf != RECONF_REINIT)
2338 					fatalx("king bula sez: "
2339 					    "expected REINIT");
2340 
2341 				if ((nla->fd = imsg.fd) == -1)
2342 					log_warnx("expected to receive fd for "
2343 					    "%s but didn't receive any",
2344 					    log_sockaddr((struct sockaddr *)
2345 					    &nla->sa));
2346 
2347 				la = calloc(1, sizeof(struct listen_addr));
2348 				if (la == NULL)
2349 					fatal(NULL);
2350 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2351 				la->flags = nla->flags;
2352 				la->fd = nla->fd;
2353 				la->reconf = RECONF_REINIT;
2354 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2355 				    entry);
2356 			} else {
2357 				if (nla->reconf != RECONF_KEEP)
2358 					fatalx("king bula sez: expected KEEP");
2359 				la->reconf = RECONF_KEEP;
2360 			}
2361 
2362 			break;
2363 		case IMSG_RECONF_CTRL:
2364 			if (idx != PFD_PIPE_MAIN)
2365 				fatalx("reconf request not from parent");
2366 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2367 			    sizeof(restricted))
2368 				fatalx("IFINFO imsg with wrong len");
2369 			memcpy(&restricted, imsg.data, sizeof(restricted));
2370 			if (imsg.fd == -1) {
2371 				log_warnx("expected to receive fd for control "
2372 				    "socket but didn't receive any");
2373 				break;
2374 			}
2375 			if (restricted) {
2376 				control_shutdown(rcsock);
2377 				rcsock = imsg.fd;
2378 				control_listen(rcsock);
2379 			} else {
2380 				control_shutdown(csock);
2381 				csock = imsg.fd;
2382 				control_listen(csock);
2383 			}
2384 			break;
2385 		case IMSG_RECONF_DONE:
2386 			if (idx != PFD_PIPE_MAIN)
2387 				fatalx("reconf request not from parent");
2388 			if (nconf == NULL)
2389 				fatalx("got IMSG_RECONF_DONE but no config");
2390 			conf->flags = nconf->flags;
2391 			conf->log = nconf->log;
2392 			conf->bgpid = nconf->bgpid;
2393 			conf->clusterid = nconf->clusterid;
2394 			conf->as = nconf->as;
2395 			conf->short_as = nconf->short_as;
2396 			conf->holdtime = nconf->holdtime;
2397 			conf->min_holdtime = nconf->min_holdtime;
2398 			conf->connectretry = nconf->connectretry;
2399 
2400 			/* add new peers */
2401 			for (p = npeers; p != NULL; p = next) {
2402 				next = p->next;
2403 				p->next = peers;
2404 				peers = p;
2405 			}
2406 			/* find ones that need attention */
2407 			for (p = peers; p != NULL; p = p->next) {
2408 				/* needs to be deleted? */
2409 				if (p->conf.reconf_action == RECONF_NONE &&
2410 				    !p->conf.cloned)
2411 					p->conf.reconf_action = RECONF_DELETE;
2412 				/* had demotion, is demoted, demote removed? */
2413 				if (p->demoted && !p->conf.demote_group[0])
2414 						session_demote(p, -1);
2415 			}
2416 
2417 			/* delete old listeners */
2418 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2419 			    la = nla) {
2420 				nla = TAILQ_NEXT(la, entry);
2421 				if (la->reconf == RECONF_NONE) {
2422 					log_info("not listening on %s any more",
2423 					    log_sockaddr(
2424 					    (struct sockaddr *)&la->sa));
2425 					TAILQ_REMOVE(conf->listen_addrs, la,
2426 					    entry);
2427 					close(la->fd);
2428 					free(la);
2429 				}
2430 			}
2431 
2432 			/* add new listeners */
2433 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2434 			    NULL) {
2435 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2436 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2437 				    entry);
2438 			}
2439 
2440 			setup_listeners(listener_cnt);
2441 			free(nconf->listen_addrs);
2442 			free(nconf);
2443 			nconf = NULL;
2444 			pending_reconf = 0;
2445 			log_info("SE reconfigured");
2446 			break;
2447 		case IMSG_IFINFO:
2448 			if (idx != PFD_PIPE_MAIN)
2449 				fatalx("IFINFO message not from parent");
2450 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2451 			    sizeof(struct kif))
2452 				fatalx("IFINFO imsg with wrong len");
2453 			kif = imsg.data;
2454 			depend_ok = (kif->flags & IFF_UP) &&
2455 			    LINK_STATE_IS_UP(kif->link_state);
2456 
2457 			for (p = peers; p != NULL; p = p->next)
2458 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2459 					if (depend_ok && !p->depend_ok) {
2460 						p->depend_ok = depend_ok;
2461 						bgp_fsm(p, EVNT_START);
2462 					} else if (!depend_ok && p->depend_ok) {
2463 						p->depend_ok = depend_ok;
2464 						session_stop(p,
2465 						    ERR_CEASE_OTHER_CHANGE);
2466 					}
2467 				}
2468 			break;
2469 		case IMSG_MRT_OPEN:
2470 		case IMSG_MRT_REOPEN:
2471 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2472 			    sizeof(struct mrt)) {
2473 				log_warnx("wrong imsg len");
2474 				break;
2475 			}
2476 
2477 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2478 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2479 				log_warnx("expected to receive fd for mrt dump "
2480 				    "but didn't receive any");
2481 
2482 			mrt = mrt_get(&mrthead, &xmrt);
2483 			if (mrt == NULL) {
2484 				/* new dump */
2485 				mrt = calloc(1, sizeof(struct mrt));
2486 				if (mrt == NULL)
2487 					fatal("session_dispatch_imsg");
2488 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2489 				TAILQ_INIT(&mrt->wbuf.bufs);
2490 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2491 			} else {
2492 				/* old dump reopened */
2493 				close(mrt->wbuf.fd);
2494 				mrt->wbuf.fd = xmrt.wbuf.fd;
2495 			}
2496 			break;
2497 		case IMSG_MRT_CLOSE:
2498 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2499 			    sizeof(struct mrt)) {
2500 				log_warnx("wrong imsg len");
2501 				break;
2502 			}
2503 
2504 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2505 			mrt = mrt_get(&mrthead, &xmrt);
2506 			if (mrt != NULL) {
2507 				mrt_clean(mrt);
2508 				LIST_REMOVE(mrt, entry);
2509 				free(mrt);
2510 			}
2511 			break;
2512 		case IMSG_CTL_KROUTE:
2513 		case IMSG_CTL_KROUTE_ADDR:
2514 		case IMSG_CTL_SHOW_NEXTHOP:
2515 		case IMSG_CTL_SHOW_INTERFACE:
2516 		case IMSG_CTL_SHOW_FIB_TABLES:
2517 			if (idx != PFD_PIPE_MAIN)
2518 				fatalx("ctl kroute request not from parent");
2519 			control_imsg_relay(&imsg);
2520 			break;
2521 		case IMSG_CTL_SHOW_RIB:
2522 		case IMSG_CTL_SHOW_RIB_PREFIX:
2523 		case IMSG_CTL_SHOW_RIB_ATTR:
2524 		case IMSG_CTL_SHOW_RIB_MEM:
2525 		case IMSG_CTL_SHOW_NETWORK:
2526 		case IMSG_CTL_SHOW_NEIGHBOR:
2527 			if (idx != PFD_PIPE_ROUTE_CTL)
2528 				fatalx("ctl rib request not from RDE");
2529 			control_imsg_relay(&imsg);
2530 			break;
2531 		case IMSG_CTL_END:
2532 		case IMSG_CTL_RESULT:
2533 			control_imsg_relay(&imsg);
2534 			break;
2535 		case IMSG_UPDATE:
2536 			if (idx != PFD_PIPE_ROUTE)
2537 				fatalx("update request not from RDE");
2538 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2539 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2540 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2541 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2542 				log_warnx("RDE sent invalid update");
2543 			else
2544 				session_update(imsg.hdr.peerid, imsg.data,
2545 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2546 			break;
2547 		case IMSG_UPDATE_ERR:
2548 			if (idx != PFD_PIPE_ROUTE)
2549 				fatalx("update request not from RDE");
2550 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2551 				log_warnx("RDE sent invalid notification");
2552 				break;
2553 			}
2554 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2555 				log_warnx("no such peer: id=%u",
2556 				    imsg.hdr.peerid);
2557 				break;
2558 			}
2559 			data = imsg.data;
2560 			errcode = *data++;
2561 			subcode = *data++;
2562 
2563 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2564 				data = NULL;
2565 
2566 			session_notification(p, errcode, subcode,
2567 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2568 			switch (errcode) {
2569 			case ERR_CEASE:
2570 				switch (subcode) {
2571 				case ERR_CEASE_MAX_PREFIX:
2572 					bgp_fsm(p, EVNT_STOP);
2573 					if (p->conf.max_prefix_restart)
2574 						timer_set(p, Timer_IdleHold, 60 *
2575 						    p->conf.max_prefix_restart);
2576 					break;
2577 				default:
2578 					bgp_fsm(p, EVNT_CON_FATAL);
2579 					break;
2580 				}
2581 				break;
2582 			default:
2583 				bgp_fsm(p, EVNT_CON_FATAL);
2584 				break;
2585 			}
2586 			break;
2587 		default:
2588 			break;
2589 		}
2590 		imsg_free(&imsg);
2591 	}
2592 }
2593 
2594 int
2595 la_cmp(struct listen_addr *a, struct listen_addr *b)
2596 {
2597 	struct sockaddr_in	*in_a, *in_b;
2598 	struct sockaddr_in6	*in6_a, *in6_b;
2599 
2600 	if (a->sa.ss_family != b->sa.ss_family)
2601 		return (1);
2602 
2603 	switch (a->sa.ss_family) {
2604 	case AF_INET:
2605 		in_a = (struct sockaddr_in *)&a->sa;
2606 		in_b = (struct sockaddr_in *)&b->sa;
2607 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2608 			return (1);
2609 		if (in_a->sin_port != in_b->sin_port)
2610 			return (1);
2611 		break;
2612 	case AF_INET6:
2613 		in6_a = (struct sockaddr_in6 *)&a->sa;
2614 		in6_b = (struct sockaddr_in6 *)&b->sa;
2615 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2616 		    sizeof(struct in6_addr)))
2617 			return (1);
2618 		if (in6_a->sin6_port != in6_b->sin6_port)
2619 			return (1);
2620 		break;
2621 	default:
2622 		fatal("king bula sez: unknown address family");
2623 		/* NOTREACHED */
2624 	}
2625 
2626 	return (0);
2627 }
2628 
2629 struct peer *
2630 getpeerbyaddr(struct bgpd_addr *addr)
2631 {
2632 	struct peer *p;
2633 
2634 	/* we might want a more effective way to find peers by IP */
2635 	for (p = peers; p != NULL &&
2636 	    memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr));
2637 	    p = p->next)
2638 		;	/* nothing */
2639 
2640 	return (p);
2641 }
2642 
2643 struct peer *
2644 getpeerbydesc(const char *descr)
2645 {
2646 	struct peer	*p, *res = NULL;
2647 	int		 match = 0;
2648 
2649 	for (p = peers; p != NULL; p = p->next)
2650 		if (!strcmp(p->conf.descr, descr)) {
2651 			res = p;
2652 			match++;
2653 		}
2654 
2655 	if (match > 1)
2656 		log_info("neighbor description \"%s\" not unique, request "
2657 		    "aborted", descr);
2658 
2659 	if (match == 1)
2660 		return (res);
2661 	else
2662 		return (NULL);
2663 }
2664 
2665 struct peer *
2666 getpeerbyip(struct sockaddr *ip)
2667 {
2668 	struct bgpd_addr addr;
2669 	struct peer	*p, *newpeer, *loose = NULL;
2670 	u_int32_t	 id;
2671 
2672 	sa2addr(ip, &addr);
2673 
2674 	/* we might want a more effective way to find peers by IP */
2675 	for (p = peers; p != NULL; p = p->next)
2676 		if (!p->conf.template &&
2677 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
2678 			return (p);
2679 
2680 	/* try template matching */
2681 	for (p = peers; p != NULL; p = p->next)
2682 		if (p->conf.template &&
2683 		    p->conf.remote_addr.aid == addr.aid &&
2684 		    session_match_mask(p, &addr))
2685 			if (loose == NULL || loose->conf.remote_masklen <
2686 			    p->conf.remote_masklen)
2687 				loose = p;
2688 
2689 	if (loose != NULL) {
2690 		/* clone */
2691 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
2692 			fatal(NULL);
2693 		memcpy(newpeer, loose, sizeof(struct peer));
2694 		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
2695 			for (p = peers; p != NULL && p->conf.id != id;
2696 			    p = p->next)
2697 				;	/* nothing */
2698 			if (p == NULL) {	/* we found a free id */
2699 				newpeer->conf.id = id;
2700 				break;
2701 			}
2702 		}
2703 		sa2addr(ip, &newpeer->conf.remote_addr);
2704 		switch (ip->sa_family) {
2705 		case AF_INET:
2706 			newpeer->conf.remote_masklen = 32;
2707 			break;
2708 		case AF_INET6:
2709 			newpeer->conf.remote_masklen = 128;
2710 			break;
2711 		}
2712 		newpeer->conf.template = 0;
2713 		newpeer->conf.cloned = 1;
2714 		newpeer->state = newpeer->prev_state = STATE_NONE;
2715 		newpeer->conf.reconf_action = RECONF_KEEP;
2716 		newpeer->rbuf = NULL;
2717 		init_peer(newpeer);
2718 		bgp_fsm(newpeer, EVNT_START);
2719 		newpeer->next = peers;
2720 		peers = newpeer;
2721 		return (newpeer);
2722 	}
2723 
2724 	return (NULL);
2725 }
2726 
2727 int
2728 session_match_mask(struct peer *p, struct bgpd_addr *a)
2729 {
2730 	in_addr_t	 v4mask;
2731 	struct in6_addr	 masked;
2732 
2733 	switch (p->conf.remote_addr.aid) {
2734 	case AID_INET:
2735 		v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
2736 		if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask))
2737 			return (1);
2738 		return (0);
2739 	case AID_INET6:
2740 		inet6applymask(&masked, &a->v6, p->conf.remote_masklen);
2741 
2742 		if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked)))
2743 			return (1);
2744 		return (0);
2745 	}
2746 	return (0);
2747 }
2748 
2749 struct peer *
2750 getpeerbyid(u_int32_t peerid)
2751 {
2752 	struct peer *p;
2753 
2754 	/* we might want a more effective way to find peers by IP */
2755 	for (p = peers; p != NULL &&
2756 	    p->conf.id != peerid; p = p->next)
2757 		;	/* nothing */
2758 
2759 	return (p);
2760 }
2761 
2762 void
2763 session_down(struct peer *peer)
2764 {
2765 	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
2766 	peer->stats.last_updown = time(NULL);
2767 	if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
2768 	    NULL, 0) == -1)
2769 		fatalx("imsg_compose error");
2770 }
2771 
2772 void
2773 session_up(struct peer *p)
2774 {
2775 	struct session_up	 sup;
2776 
2777 	if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
2778 	    &p->conf, sizeof(p->conf)) == -1)
2779 		fatalx("imsg_compose error");
2780 
2781 	sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr);
2782 	sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr);
2783 
2784 	sup.remote_bgpid = p->remote_bgpid;
2785 	sup.short_as = p->short_as;
2786 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
2787 	p->stats.last_updown = time(NULL);
2788 	if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
2789 	    &sup, sizeof(sup)) == -1)
2790 		fatalx("imsg_compose error");
2791 }
2792 
2793 int
2794 imsg_compose_parent(int type, u_int32_t peerid, pid_t pid, void *data,
2795     u_int16_t datalen)
2796 {
2797 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
2798 }
2799 
2800 int
2801 imsg_compose_rde(int type, pid_t pid, void *data, u_int16_t datalen)
2802 {
2803 	return (imsg_compose(ibuf_rde, type, 0, pid, -1, data, datalen));
2804 }
2805 
2806 void
2807 session_demote(struct peer *p, int level)
2808 {
2809 	struct demote_msg	msg;
2810 
2811 	strlcpy(msg.demote_group, p->conf.demote_group,
2812 	    sizeof(msg.demote_group));
2813 	msg.level = level;
2814 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
2815 	    &msg, sizeof(msg)) == -1)
2816 		fatalx("imsg_compose error");
2817 
2818 	p->demoted += level;
2819 }
2820 
2821 void
2822 session_stop(struct peer *peer, u_int8_t subcode)
2823 {
2824 	switch (peer->state) {
2825 	case STATE_OPENSENT:
2826 	case STATE_OPENCONFIRM:
2827 	case STATE_ESTABLISHED:
2828 		session_notification(peer, ERR_CEASE, subcode, NULL, 0);
2829 		break;
2830 	default:
2831 		/* session not open, no need to send notification */
2832 		break;
2833 	}
2834 	bgp_fsm(peer, EVNT_STOP);
2835 }
2836