xref: /openbsd-src/usr.sbin/bgpd/rde.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: rde.c,v 1.308 2011/07/09 02:51:18 henning Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/socket.h>
21 #include <sys/time.h>
22 #include <sys/resource.h>
23 
24 #include <errno.h>
25 #include <ifaddrs.h>
26 #include <pwd.h>
27 #include <poll.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "bgpd.h"
35 #include "mrt.h"
36 #include "rde.h"
37 #include "session.h"
38 
39 #define PFD_PIPE_MAIN		0
40 #define PFD_PIPE_SESSION	1
41 #define PFD_PIPE_SESSION_CTL	2
42 #define PFD_PIPE_COUNT		3
43 
44 void		 rde_sighdlr(int);
45 void		 rde_dispatch_imsg_session(struct imsgbuf *);
46 void		 rde_dispatch_imsg_parent(struct imsgbuf *);
47 int		 rde_update_dispatch(struct imsg *);
48 void		 rde_update_update(struct rde_peer *, struct rde_aspath *,
49 		     struct bgpd_addr *, u_int8_t);
50 void		 rde_update_withdraw(struct rde_peer *, struct bgpd_addr *,
51 		     u_int8_t);
52 int		 rde_attr_parse(u_char *, u_int16_t, struct rde_peer *,
53 		     struct rde_aspath *, struct mpattr *);
54 u_int8_t	 rde_attr_missing(struct rde_aspath *, int, u_int16_t);
55 int		 rde_get_mp_nexthop(u_char *, u_int16_t, u_int8_t,
56 		     struct rde_aspath *);
57 int		 rde_update_extract_prefix(u_char *, u_int16_t, void *,
58 		     u_int8_t, u_int8_t);
59 int		 rde_update_get_prefix(u_char *, u_int16_t, struct bgpd_addr *,
60 		     u_int8_t *);
61 int		 rde_update_get_prefix6(u_char *, u_int16_t, struct bgpd_addr *,
62 		     u_int8_t *);
63 int		 rde_update_get_vpn4(u_char *, u_int16_t, struct bgpd_addr *,
64 		     u_int8_t *);
65 void		 rde_update_err(struct rde_peer *, u_int8_t , u_int8_t,
66 		     void *, u_int16_t);
67 void		 rde_update_log(const char *, u_int16_t,
68 		     const struct rde_peer *, const struct bgpd_addr *,
69 		     const struct bgpd_addr *, u_int8_t);
70 void		 rde_as4byte_fixup(struct rde_peer *, struct rde_aspath *);
71 void		 rde_reflector(struct rde_peer *, struct rde_aspath *);
72 
73 void		 rde_dump_rib_as(struct prefix *, struct rde_aspath *,pid_t,
74 		     int);
75 void		 rde_dump_filter(struct prefix *,
76 		     struct ctl_show_rib_request *);
77 void		 rde_dump_filterout(struct rde_peer *, struct prefix *,
78 		     struct ctl_show_rib_request *);
79 void		 rde_dump_upcall(struct rib_entry *, void *);
80 void		 rde_dump_prefix_upcall(struct rib_entry *, void *);
81 void		 rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t,
82 		     enum imsg_type);
83 void		 rde_dump_mrt_new(struct mrt *, pid_t, int);
84 void		 rde_dump_done(void *);
85 
86 int		 rde_rdomain_import(struct rde_aspath *, struct rdomain *);
87 void		 rde_up_dump_upcall(struct rib_entry *, void *);
88 void		 rde_softreconfig_out(struct rib_entry *, void *);
89 void		 rde_softreconfig_in(struct rib_entry *, void *);
90 void		 rde_softreconfig_load(struct rib_entry *, void *);
91 void		 rde_softreconfig_load_peer(struct rib_entry *, void *);
92 void		 rde_softreconfig_unload_peer(struct rib_entry *, void *);
93 void		 rde_update_queue_runner(void);
94 void		 rde_update6_queue_runner(u_int8_t);
95 
96 void		 peer_init(u_int32_t);
97 void		 peer_shutdown(void);
98 void		 peer_localaddrs(struct rde_peer *, struct bgpd_addr *);
99 struct rde_peer	*peer_add(u_int32_t, struct peer_config *);
100 struct rde_peer	*peer_get(u_int32_t);
101 void		 peer_up(u_int32_t, struct session_up *);
102 void		 peer_down(u_int32_t);
103 void		 peer_dump(u_int32_t, u_int8_t);
104 void		 peer_send_eor(struct rde_peer *, u_int8_t);
105 
106 void		 network_add(struct network_config *, int);
107 void		 network_delete(struct network_config *, int);
108 void		 network_dump_upcall(struct rib_entry *, void *);
109 
110 void		 rde_shutdown(void);
111 int		 sa_cmp(struct bgpd_addr *, struct sockaddr *);
112 
113 volatile sig_atomic_t	 rde_quit = 0;
114 struct bgpd_config	*conf, *nconf;
115 time_t			 reloadtime;
116 struct rde_peer_head	 peerlist;
117 struct rde_peer		*peerself;
118 struct filter_head	*rules_l, *newrules;
119 struct rdomain_head	*rdomains_l, *newdomains;
120 struct imsgbuf		*ibuf_se;
121 struct imsgbuf		*ibuf_se_ctl;
122 struct imsgbuf		*ibuf_main;
123 struct rde_memstats	 rdemem;
124 
125 struct rde_dump_ctx {
126 	struct rib_context		ribctx;
127 	struct ctl_show_rib_request	req;
128 	sa_family_t			af;
129 };
130 
131 struct rde_mrt_ctx {
132 	struct mrt		mrt;
133 	struct rib_context	ribctx;
134 	LIST_ENTRY(rde_mrt_ctx)	entry;
135 };
136 
137 LIST_HEAD(, rde_mrt_ctx) rde_mrts = LIST_HEAD_INITIALIZER(rde_mrts);
138 u_int rde_mrt_cnt;
139 
140 void
141 rde_sighdlr(int sig)
142 {
143 	switch (sig) {
144 	case SIGINT:
145 	case SIGTERM:
146 		rde_quit = 1;
147 		break;
148 	}
149 }
150 
151 u_int32_t	peerhashsize = 64;
152 u_int32_t	pathhashsize = 1024;
153 u_int32_t	attrhashsize = 512;
154 u_int32_t	nexthophashsize = 64;
155 
156 pid_t
157 rde_main(int pipe_m2r[2], int pipe_s2r[2], int pipe_m2s[2], int pipe_s2rctl[2],
158     int debug)
159 {
160 	pid_t			 pid;
161 	struct passwd		*pw;
162 	struct pollfd		*pfd = NULL;
163 	struct rde_mrt_ctx	*mctx, *xmctx;
164 	void			*newp;
165 	u_int			 pfd_elms = 0, i, j;
166 	int			 timeout;
167 	u_int8_t		 aid;
168 
169 	switch (pid = fork()) {
170 	case -1:
171 		fatal("cannot fork");
172 	case 0:
173 		break;
174 	default:
175 		return (pid);
176 	}
177 
178 	if ((pw = getpwnam(BGPD_USER)) == NULL)
179 		fatal("getpwnam");
180 
181 	if (chroot(pw->pw_dir) == -1)
182 		fatal("chroot");
183 	if (chdir("/") == -1)
184 		fatal("chdir(\"/\")");
185 
186 	setproctitle("route decision engine");
187 	bgpd_process = PROC_RDE;
188 
189 	if (setgroups(1, &pw->pw_gid) ||
190 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
191 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
192 		fatal("can't drop privileges");
193 
194 	signal(SIGTERM, rde_sighdlr);
195 	signal(SIGINT, rde_sighdlr);
196 	signal(SIGPIPE, SIG_IGN);
197 	signal(SIGHUP, SIG_IGN);
198 	signal(SIGALRM, SIG_IGN);
199 	signal(SIGUSR1, SIG_IGN);
200 
201 	close(pipe_s2r[0]);
202 	close(pipe_s2rctl[0]);
203 	close(pipe_m2r[0]);
204 	close(pipe_m2s[0]);
205 	close(pipe_m2s[1]);
206 
207 	/* initialize the RIB structures */
208 	if ((ibuf_se = malloc(sizeof(struct imsgbuf))) == NULL ||
209 	    (ibuf_se_ctl = malloc(sizeof(struct imsgbuf))) == NULL ||
210 	    (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
211 		fatal(NULL);
212 	imsg_init(ibuf_se, pipe_s2r[1]);
213 	imsg_init(ibuf_se_ctl, pipe_s2rctl[1]);
214 	imsg_init(ibuf_main, pipe_m2r[1]);
215 
216 	pt_init();
217 	path_init(pathhashsize);
218 	aspath_init(pathhashsize);
219 	attr_init(attrhashsize);
220 	nexthop_init(nexthophashsize);
221 	peer_init(peerhashsize);
222 
223 	rules_l = calloc(1, sizeof(struct filter_head));
224 	if (rules_l == NULL)
225 		fatal(NULL);
226 	TAILQ_INIT(rules_l);
227 	rdomains_l = calloc(1, sizeof(struct rdomain_head));
228 	if (rdomains_l == NULL)
229 		fatal(NULL);
230 	SIMPLEQ_INIT(rdomains_l);
231 	if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL)
232 		fatal(NULL);
233 	log_info("route decision engine ready");
234 
235 	while (rde_quit == 0) {
236 		if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) {
237 			if ((newp = realloc(pfd, sizeof(struct pollfd) *
238 			    (PFD_PIPE_COUNT + rde_mrt_cnt))) == NULL) {
239 				/* panic for now  */
240 				log_warn("could not resize pfd from %u -> %u"
241 				    " entries", pfd_elms, PFD_PIPE_COUNT +
242 				    rde_mrt_cnt);
243 				fatalx("exiting");
244 			}
245 			pfd = newp;
246 			pfd_elms = PFD_PIPE_COUNT + rde_mrt_cnt;
247 		}
248 		timeout = INFTIM;
249 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
250 		pfd[PFD_PIPE_MAIN].fd = ibuf_main->fd;
251 		pfd[PFD_PIPE_MAIN].events = POLLIN;
252 		if (ibuf_main->w.queued > 0)
253 			pfd[PFD_PIPE_MAIN].events |= POLLOUT;
254 
255 		pfd[PFD_PIPE_SESSION].fd = ibuf_se->fd;
256 		pfd[PFD_PIPE_SESSION].events = POLLIN;
257 		if (ibuf_se->w.queued > 0)
258 			pfd[PFD_PIPE_SESSION].events |= POLLOUT;
259 
260 		pfd[PFD_PIPE_SESSION_CTL].fd = ibuf_se_ctl->fd;
261 		pfd[PFD_PIPE_SESSION_CTL].events = POLLIN;
262 		if (ibuf_se_ctl->w.queued > 0)
263 			pfd[PFD_PIPE_SESSION_CTL].events |= POLLOUT;
264 		else if (rib_dump_pending())
265 			timeout = 0;
266 
267 		i = PFD_PIPE_COUNT;
268 		for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) {
269 			xmctx = LIST_NEXT(mctx, entry);
270 			if (mctx->mrt.wbuf.queued) {
271 				pfd[i].fd = mctx->mrt.wbuf.fd;
272 				pfd[i].events = POLLOUT;
273 				i++;
274 			} else if (mctx->mrt.state == MRT_STATE_REMOVE) {
275 				close(mctx->mrt.wbuf.fd);
276 				LIST_REMOVE(&mctx->ribctx, entry);
277 				LIST_REMOVE(mctx, entry);
278 				free(mctx);
279 				rde_mrt_cnt--;
280 			}
281 		}
282 
283 		if (poll(pfd, i, timeout) == -1) {
284 			if (errno != EINTR)
285 				fatal("poll error");
286 			continue;
287 		}
288 
289 		if ((pfd[PFD_PIPE_MAIN].revents & POLLOUT) &&
290 		    ibuf_main->w.queued)
291 			if (msgbuf_write(&ibuf_main->w) < 0)
292 				fatal("pipe write error");
293 
294 		if (pfd[PFD_PIPE_MAIN].revents & POLLIN)
295 			rde_dispatch_imsg_parent(ibuf_main);
296 
297 		if ((pfd[PFD_PIPE_SESSION].revents & POLLOUT) &&
298 		    ibuf_se->w.queued)
299 			if (msgbuf_write(&ibuf_se->w) < 0)
300 				fatal("pipe write error");
301 
302 		if (pfd[PFD_PIPE_SESSION].revents & POLLIN)
303 			rde_dispatch_imsg_session(ibuf_se);
304 
305 		if ((pfd[PFD_PIPE_SESSION_CTL].revents & POLLOUT) &&
306 		    ibuf_se_ctl->w.queued)
307 			if (msgbuf_write(&ibuf_se_ctl->w) < 0)
308 				fatal("pipe write error");
309 
310 		if (pfd[PFD_PIPE_SESSION_CTL].revents & POLLIN)
311 			rde_dispatch_imsg_session(ibuf_se_ctl);
312 
313 		for (j = PFD_PIPE_COUNT, mctx = LIST_FIRST(&rde_mrts);
314 		    j < i && mctx != 0; j++) {
315 			if (pfd[j].fd == mctx->mrt.wbuf.fd &&
316 			    pfd[j].revents & POLLOUT)
317 				mrt_write(&mctx->mrt);
318 			mctx = LIST_NEXT(mctx, entry);
319 		}
320 
321 		rde_update_queue_runner();
322 		for (aid = AID_INET6; aid < AID_MAX; aid++)
323 			rde_update6_queue_runner(aid);
324 		if (ibuf_se_ctl->w.queued <= 0)
325 			rib_dump_runner();
326 	}
327 
328 	/* do not clean up on shutdown on production, it takes ages. */
329 	if (debug)
330 		rde_shutdown();
331 
332 	while ((mctx = LIST_FIRST(&rde_mrts)) != NULL) {
333 		msgbuf_clear(&mctx->mrt.wbuf);
334 		close(mctx->mrt.wbuf.fd);
335 		LIST_REMOVE(&mctx->ribctx, entry);
336 		LIST_REMOVE(mctx, entry);
337 		free(mctx);
338 	}
339 
340 	msgbuf_clear(&ibuf_se->w);
341 	free(ibuf_se);
342 	msgbuf_clear(&ibuf_se_ctl->w);
343 	free(ibuf_se_ctl);
344 	msgbuf_clear(&ibuf_main->w);
345 	free(ibuf_main);
346 
347 	log_info("route decision engine exiting");
348 	_exit(0);
349 }
350 
351 struct network_config	 netconf_s, netconf_p;
352 struct filter_set_head	*session_set, *parent_set;
353 
354 void
355 rde_dispatch_imsg_session(struct imsgbuf *ibuf)
356 {
357 	struct imsg		 imsg;
358 	struct peer		 p;
359 	struct peer_config	 pconf;
360 	struct rde_peer		*peer;
361 	struct session_up	 sup;
362 	struct ctl_show_rib_request	req;
363 	struct filter_set	*s;
364 	struct nexthop		*nh;
365 	ssize_t			 n;
366 	int			 verbose;
367 	u_int8_t		 aid;
368 
369 	if ((n = imsg_read(ibuf)) == -1)
370 		fatal("rde_dispatch_imsg_session: imsg_read error");
371 	if (n == 0)	/* connection closed */
372 		fatalx("rde_dispatch_imsg_session: pipe closed");
373 
374 	for (;;) {
375 		if ((n = imsg_get(ibuf, &imsg)) == -1)
376 			fatal("rde_dispatch_imsg_session: imsg_read error");
377 		if (n == 0)
378 			break;
379 
380 		switch (imsg.hdr.type) {
381 		case IMSG_UPDATE:
382 			rde_update_dispatch(&imsg);
383 			break;
384 		case IMSG_SESSION_ADD:
385 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(pconf))
386 				fatalx("incorrect size of session request");
387 			memcpy(&pconf, imsg.data, sizeof(pconf));
388 			peer = peer_add(imsg.hdr.peerid, &pconf);
389 			if (peer == NULL) {
390 				log_warnx("session add: "
391 				    "peer id %d already exists",
392 				    imsg.hdr.peerid);
393 				break;
394 			}
395 			break;
396 		case IMSG_SESSION_UP:
397 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup))
398 				fatalx("incorrect size of session request");
399 			memcpy(&sup, imsg.data, sizeof(sup));
400 			peer_up(imsg.hdr.peerid, &sup);
401 			break;
402 		case IMSG_SESSION_DOWN:
403 			peer_down(imsg.hdr.peerid);
404 			break;
405 		case IMSG_REFRESH:
406 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
407 				log_warnx("rde_dispatch: wrong imsg len");
408 				break;
409 			}
410 			memcpy(&aid, imsg.data, sizeof(aid));
411 			if (aid >= AID_MAX)
412 				fatalx("IMSG_REFRESH: bad AID");
413 			peer_dump(imsg.hdr.peerid, aid);
414 			break;
415 		case IMSG_NETWORK_ADD:
416 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
417 			    sizeof(struct network_config)) {
418 				log_warnx("rde_dispatch: wrong imsg len");
419 				break;
420 			}
421 			memcpy(&netconf_s, imsg.data, sizeof(netconf_s));
422 			TAILQ_INIT(&netconf_s.attrset);
423 			session_set = &netconf_s.attrset;
424 			break;
425 		case IMSG_NETWORK_DONE:
426 			if (imsg.hdr.len != IMSG_HEADER_SIZE) {
427 				log_warnx("rde_dispatch: wrong imsg len");
428 				break;
429 			}
430 			session_set = NULL;
431 			switch (netconf_s.prefix.aid) {
432 			case AID_INET:
433 				if (netconf_s.prefixlen > 32)
434 					goto badnet;
435 				network_add(&netconf_s, 0);
436 				break;
437 			case AID_INET6:
438 				if (netconf_s.prefixlen > 128)
439 					goto badnet;
440 				network_add(&netconf_s, 0);
441 				break;
442 			default:
443 badnet:
444 				log_warnx("rde_dispatch: bad network");
445 				break;
446 			}
447 			break;
448 		case IMSG_NETWORK_REMOVE:
449 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
450 			    sizeof(struct network_config)) {
451 				log_warnx("rde_dispatch: wrong imsg len");
452 				break;
453 			}
454 			memcpy(&netconf_s, imsg.data, sizeof(netconf_s));
455 			TAILQ_INIT(&netconf_s.attrset);
456 			network_delete(&netconf_s, 0);
457 			break;
458 		case IMSG_NETWORK_FLUSH:
459 			if (imsg.hdr.len != IMSG_HEADER_SIZE) {
460 				log_warnx("rde_dispatch: wrong imsg len");
461 				break;
462 			}
463 			prefix_network_clean(peerself, time(NULL),
464 			    F_ANN_DYNAMIC);
465 			break;
466 		case IMSG_FILTER_SET:
467 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
468 			    sizeof(struct filter_set)) {
469 				log_warnx("rde_dispatch: wrong imsg len");
470 				break;
471 			}
472 			if (session_set == NULL) {
473 				log_warnx("rde_dispatch: "
474 				    "IMSG_FILTER_SET unexpected");
475 				break;
476 			}
477 			if ((s = malloc(sizeof(struct filter_set))) == NULL)
478 				fatal(NULL);
479 			memcpy(s, imsg.data, sizeof(struct filter_set));
480 			TAILQ_INSERT_TAIL(session_set, s, entry);
481 
482 			if (s->type == ACTION_SET_NEXTHOP) {
483 				nh = nexthop_get(&s->action.nexthop);
484 				nh->refcnt++;
485 			}
486 			break;
487 		case IMSG_CTL_SHOW_NETWORK:
488 		case IMSG_CTL_SHOW_RIB:
489 		case IMSG_CTL_SHOW_RIB_AS:
490 		case IMSG_CTL_SHOW_RIB_COMMUNITY:
491 		case IMSG_CTL_SHOW_RIB_PREFIX:
492 			if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(req)) {
493 				log_warnx("rde_dispatch: wrong imsg len");
494 				break;
495 			}
496 			memcpy(&req, imsg.data, sizeof(req));
497 			rde_dump_ctx_new(&req, imsg.hdr.pid, imsg.hdr.type);
498 			break;
499 		case IMSG_CTL_SHOW_NEIGHBOR:
500 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
501 			    sizeof(struct peer)) {
502 				log_warnx("rde_dispatch: wrong imsg len");
503 				break;
504 			}
505 			memcpy(&p, imsg.data, sizeof(struct peer));
506 			peer = peer_get(p.conf.id);
507 			if (peer != NULL) {
508 				p.stats.prefix_cnt = peer->prefix_cnt;
509 				p.stats.prefix_rcvd_update =
510 				    peer->prefix_rcvd_update;
511 				p.stats.prefix_rcvd_withdraw =
512 				    peer->prefix_rcvd_withdraw;
513 				p.stats.prefix_sent_update =
514 				    peer->prefix_sent_update;
515 				p.stats.prefix_sent_withdraw =
516 				    peer->prefix_sent_withdraw;
517 			}
518 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0,
519 			    imsg.hdr.pid, -1, &p, sizeof(struct peer));
520 			break;
521 		case IMSG_CTL_END:
522 			imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid,
523 			    -1, NULL, 0);
524 			break;
525 		case IMSG_CTL_SHOW_RIB_MEM:
526 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0,
527 			    imsg.hdr.pid, -1, &rdemem, sizeof(rdemem));
528 			break;
529 		case IMSG_CTL_LOG_VERBOSE:
530 			/* already checked by SE */
531 			memcpy(&verbose, imsg.data, sizeof(verbose));
532 			log_verbose(verbose);
533 			break;
534 		default:
535 			break;
536 		}
537 		imsg_free(&imsg);
538 	}
539 }
540 
541 void
542 rde_dispatch_imsg_parent(struct imsgbuf *ibuf)
543 {
544 	static struct rdomain	*rd;
545 	struct imsg		 imsg;
546 	struct mrt		 xmrt;
547 	struct rde_rib		 rn;
548 	struct rde_peer		*peer;
549 	struct peer_config	*pconf;
550 	struct filter_rule	*r;
551 	struct filter_set	*s;
552 	struct nexthop		*nh;
553 	int			 n, fd, reconf_in = 0, reconf_out = 0,
554 				 reconf_rib = 0;
555 	u_int16_t		 rid;
556 
557 	if ((n = imsg_read(ibuf)) == -1)
558 		fatal("rde_dispatch_imsg_parent: imsg_read error");
559 	if (n == 0)	/* connection closed */
560 		fatalx("rde_dispatch_imsg_parent: pipe closed");
561 
562 	for (;;) {
563 		if ((n = imsg_get(ibuf, &imsg)) == -1)
564 			fatal("rde_dispatch_imsg_parent: imsg_read error");
565 		if (n == 0)
566 			break;
567 
568 		switch (imsg.hdr.type) {
569 		case IMSG_NETWORK_ADD:
570 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
571 			    sizeof(struct network_config)) {
572 				log_warnx("rde_dispatch: wrong imsg len");
573 				break;
574 			}
575 			memcpy(&netconf_p, imsg.data, sizeof(netconf_p));
576 			TAILQ_INIT(&netconf_p.attrset);
577 			parent_set = &netconf_p.attrset;
578 			break;
579 		case IMSG_NETWORK_DONE:
580 			parent_set = NULL;
581 			network_add(&netconf_p, 1);
582 			break;
583 		case IMSG_NETWORK_REMOVE:
584 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
585 			    sizeof(struct network_config)) {
586 				log_warnx("rde_dispatch: wrong imsg len");
587 				break;
588 			}
589 			memcpy(&netconf_p, imsg.data, sizeof(netconf_p));
590 			TAILQ_INIT(&netconf_p.attrset);
591 			network_delete(&netconf_p, 1);
592 			break;
593 		case IMSG_RECONF_CONF:
594 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
595 			    sizeof(struct bgpd_config))
596 				fatalx("IMSG_RECONF_CONF bad len");
597 			reloadtime = time(NULL);
598 			newrules = calloc(1, sizeof(struct filter_head));
599 			if (newrules == NULL)
600 				fatal(NULL);
601 			TAILQ_INIT(newrules);
602 			newdomains = calloc(1, sizeof(struct rdomain_head));
603 			if (newdomains == NULL)
604 				fatal(NULL);
605 			SIMPLEQ_INIT(newdomains);
606 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
607 			    NULL)
608 				fatal(NULL);
609 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
610 			for (rid = 0; rid < rib_size; rid++)
611 				ribs[rid].state = RECONF_DELETE;
612 			break;
613 		case IMSG_RECONF_RIB:
614 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
615 			    sizeof(struct rde_rib))
616 				fatalx("IMSG_RECONF_RIB bad len");
617 			memcpy(&rn, imsg.data, sizeof(rn));
618 			rid = rib_find(rn.name);
619 			if (rid == RIB_FAILED)
620 				rib_new(rn.name, rn.rtableid, rn.flags);
621 			else if (ribs[rid].rtableid != rn.rtableid ||
622 			    (ribs[rid].flags & F_RIB_HASNOFIB) !=
623 			    (rn.flags & F_RIB_HASNOFIB)) {
624 				/* Big hammer in the F_RIB_NOFIB case but
625 				 * not often enough used to optimise it more. */
626 				rib_free(&ribs[rid]);
627 				rib_new(rn.name, rn.rtableid, rn.flags);
628 			} else
629 				ribs[rid].state = RECONF_KEEP;
630 			break;
631 		case IMSG_RECONF_PEER:
632 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
633 			    sizeof(struct peer_config))
634 				fatalx("IMSG_RECONF_PEER bad len");
635 			if ((peer = peer_get(imsg.hdr.peerid)) == NULL)
636 				break;
637 			pconf = imsg.data;
638 			strlcpy(peer->conf.rib, pconf->rib,
639 			    sizeof(peer->conf.rib));
640 			break;
641 		case IMSG_RECONF_FILTER:
642 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
643 			    sizeof(struct filter_rule))
644 				fatalx("IMSG_RECONF_FILTER bad len");
645 			if ((r = malloc(sizeof(struct filter_rule))) == NULL)
646 				fatal(NULL);
647 			memcpy(r, imsg.data, sizeof(struct filter_rule));
648 			TAILQ_INIT(&r->set);
649 			r->peer.ribid = rib_find(r->rib);
650 			parent_set = &r->set;
651 			TAILQ_INSERT_TAIL(newrules, r, entry);
652 			break;
653 		case IMSG_RECONF_RDOMAIN:
654 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
655 			    sizeof(struct rdomain))
656 				fatalx("IMSG_RECONF_RDOMAIN bad len");
657 			if ((rd = malloc(sizeof(struct rdomain))) == NULL)
658 				fatal(NULL);
659 			memcpy(rd, imsg.data, sizeof(struct rdomain));
660 			TAILQ_INIT(&rd->import);
661 			TAILQ_INIT(&rd->export);
662 			SIMPLEQ_INSERT_TAIL(newdomains, rd, entry);
663 			break;
664 		case IMSG_RECONF_RDOMAIN_EXPORT:
665 			if (rd == NULL) {
666 				log_warnx("rde_dispatch_imsg_parent: "
667 				    "IMSG_RECONF_RDOMAIN_EXPORT unexpected");
668 				break;
669 			}
670 			parent_set = &rd->export;
671 			break;
672 		case IMSG_RECONF_RDOMAIN_IMPORT:
673 			if (rd == NULL) {
674 				log_warnx("rde_dispatch_imsg_parent: "
675 				    "IMSG_RECONF_RDOMAIN_IMPORT unexpected");
676 				break;
677 			}
678 			parent_set = &rd->import;
679 			break;
680 		case IMSG_RECONF_RDOMAIN_DONE:
681 			parent_set = NULL;
682 			break;
683 		case IMSG_RECONF_DONE:
684 			if (nconf == NULL)
685 				fatalx("got IMSG_RECONF_DONE but no config");
686 			if ((nconf->flags & BGPD_FLAG_NO_EVALUATE)
687 			    != (conf->flags & BGPD_FLAG_NO_EVALUATE)) {
688 				log_warnx("change to/from route-collector "
689 				    "mode ignored");
690 				if (conf->flags & BGPD_FLAG_NO_EVALUATE)
691 					nconf->flags |= BGPD_FLAG_NO_EVALUATE;
692 				else
693 					nconf->flags &= ~BGPD_FLAG_NO_EVALUATE;
694 			}
695 			memcpy(conf, nconf, sizeof(struct bgpd_config));
696 			conf->listen_addrs = NULL;
697 			conf->csock = NULL;
698 			conf->rcsock = NULL;
699 			free(nconf);
700 			nconf = NULL;
701 			parent_set = NULL;
702 			/* sync peerself with conf */
703 			peerself->remote_bgpid = ntohl(conf->bgpid);
704 			peerself->conf.local_as = conf->as;
705 			peerself->conf.remote_as = conf->as;
706 			peerself->short_as = conf->short_as;
707 
708 			/* apply new set of rdomain, sync will be done later */
709 			while ((rd = SIMPLEQ_FIRST(rdomains_l)) != NULL) {
710 				SIMPLEQ_REMOVE_HEAD(rdomains_l, entry);
711 				filterset_free(&rd->import);
712 				filterset_free(&rd->export);
713 				free(rd);
714 			}
715 			free(rdomains_l);
716 			rdomains_l = newdomains;
717 
718 			/* check if filter changed */
719 			LIST_FOREACH(peer, &peerlist, peer_l) {
720 				if (peer->conf.id == 0)
721 					continue;
722 				peer->reconf_out = 0;
723 				peer->reconf_in = 0;
724 				peer->reconf_rib = 0;
725 				if (peer->conf.softreconfig_in &&
726 				    !rde_filter_equal(rules_l, newrules, peer,
727 				    DIR_IN)) {
728 					peer->reconf_in = 1;
729 					reconf_in = 1;
730 				}
731 				if (peer->ribid != rib_find(peer->conf.rib)) {
732 					rib_dump(&ribs[peer->ribid],
733 					    rde_softreconfig_unload_peer, peer,
734 					    AID_UNSPEC);
735 					peer->ribid = rib_find(peer->conf.rib);
736 					peer->reconf_rib = 1;
737 					reconf_rib = 1;
738 					continue;
739 				}
740 				if (peer->conf.softreconfig_out &&
741 				    !rde_filter_equal(rules_l, newrules, peer,
742 				    DIR_OUT)) {
743 					peer->reconf_out = 1;
744 					reconf_out = 1;
745 				}
746 			}
747 			/* bring ribs in sync before softreconfig dance */
748 			for (rid = 0; rid < rib_size; rid++) {
749 				if (ribs[rid].state == RECONF_DELETE)
750 					rib_free(&ribs[rid]);
751 				else if (ribs[rid].state == RECONF_REINIT)
752 					rib_dump(&ribs[0],
753 					    rde_softreconfig_load, &ribs[rid],
754 					    AID_UNSPEC);
755 			}
756 			/* sync local-RIBs first */
757 			if (reconf_in)
758 				rib_dump(&ribs[0], rde_softreconfig_in, NULL,
759 				    AID_UNSPEC);
760 			/* then sync peers */
761 			if (reconf_out) {
762 				int i;
763 				for (i = 1; i < rib_size; i++) {
764 					if (ribs[i].state == RECONF_REINIT)
765 						/* already synced by _load */
766 						continue;
767 					rib_dump(&ribs[i], rde_softreconfig_out,
768 					    NULL, AID_UNSPEC);
769 				}
770 			}
771 			if (reconf_rib) {
772 				LIST_FOREACH(peer, &peerlist, peer_l) {
773 					rib_dump(&ribs[peer->ribid],
774 						rde_softreconfig_load_peer,
775 						peer, AID_UNSPEC);
776 				}
777 			}
778 
779 			while ((r = TAILQ_FIRST(rules_l)) != NULL) {
780 				TAILQ_REMOVE(rules_l, r, entry);
781 				filterset_free(&r->set);
782 				free(r);
783 			}
784 			free(rules_l);
785 			rules_l = newrules;
786 
787 			log_info("RDE reconfigured");
788 			break;
789 		case IMSG_NEXTHOP_UPDATE:
790 			nexthop_update(imsg.data);
791 			break;
792 		case IMSG_FILTER_SET:
793 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
794 			    sizeof(struct filter_set))
795 				fatalx("IMSG_RECONF_CONF bad len");
796 			if (parent_set == NULL) {
797 				log_warnx("rde_dispatch_imsg_parent: "
798 				    "IMSG_FILTER_SET unexpected");
799 				break;
800 			}
801 			if ((s = malloc(sizeof(struct filter_set))) == NULL)
802 				fatal(NULL);
803 			memcpy(s, imsg.data, sizeof(struct filter_set));
804 			TAILQ_INSERT_TAIL(parent_set, s, entry);
805 
806 			if (s->type == ACTION_SET_NEXTHOP) {
807 				nh = nexthop_get(&s->action.nexthop);
808 				nh->refcnt++;
809 			}
810 			break;
811 		case IMSG_MRT_OPEN:
812 		case IMSG_MRT_REOPEN:
813 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
814 			    sizeof(struct mrt)) {
815 				log_warnx("wrong imsg len");
816 				break;
817 			}
818 			memcpy(&xmrt, imsg.data, sizeof(xmrt));
819 			if ((fd = imsg.fd) == -1)
820 				log_warnx("expected to receive fd for mrt dump "
821 				    "but didn't receive any");
822 			else if (xmrt.type == MRT_TABLE_DUMP ||
823 			    xmrt.type == MRT_TABLE_DUMP_MP) {
824 				rde_dump_mrt_new(&xmrt, imsg.hdr.pid, fd);
825 			} else
826 				close(fd);
827 			break;
828 		case IMSG_MRT_CLOSE:
829 			/* ignore end message because a dump is atomic */
830 			break;
831 		default:
832 			break;
833 		}
834 		imsg_free(&imsg);
835 	}
836 }
837 
838 /* handle routing updates from the session engine. */
839 int
840 rde_update_dispatch(struct imsg *imsg)
841 {
842 	struct bgpd_addr	 prefix;
843 	struct mpattr		 mpa;
844 	struct rde_peer		*peer;
845 	struct rde_aspath	*asp = NULL;
846 	u_char			*p, *mpp = NULL;
847 	int			 error = -1, pos = 0;
848 	u_int16_t		 afi, len, mplen;
849 	u_int16_t		 withdrawn_len;
850 	u_int16_t		 attrpath_len;
851 	u_int16_t		 nlri_len;
852 	u_int8_t		 aid, prefixlen, safi, subtype;
853 	u_int32_t		 fas;
854 
855 	peer = peer_get(imsg->hdr.peerid);
856 	if (peer == NULL)	/* unknown peer, cannot happen */
857 		return (-1);
858 	if (peer->state != PEER_UP)
859 		return (-1);	/* peer is not yet up, cannot happen */
860 
861 	p = imsg->data;
862 
863 	if (imsg->hdr.len < IMSG_HEADER_SIZE + 2) {
864 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
865 		return (-1);
866 	}
867 
868 	memcpy(&len, p, 2);
869 	withdrawn_len = ntohs(len);
870 	p += 2;
871 	if (imsg->hdr.len < IMSG_HEADER_SIZE + 2 + withdrawn_len + 2) {
872 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
873 		return (-1);
874 	}
875 
876 	p += withdrawn_len;
877 	memcpy(&len, p, 2);
878 	attrpath_len = len = ntohs(len);
879 	p += 2;
880 	if (imsg->hdr.len <
881 	    IMSG_HEADER_SIZE + 2 + withdrawn_len + 2 + attrpath_len) {
882 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
883 		return (-1);
884 	}
885 
886 	nlri_len =
887 	    imsg->hdr.len - IMSG_HEADER_SIZE - 4 - withdrawn_len - attrpath_len;
888 	bzero(&mpa, sizeof(mpa));
889 
890 	if (attrpath_len != 0) { /* 0 = no NLRI information in this message */
891 		/* parse path attributes */
892 		asp = path_get();
893 		while (len > 0) {
894 			if ((pos = rde_attr_parse(p, len, peer, asp,
895 			    &mpa)) < 0)
896 				goto done;
897 			p += pos;
898 			len -= pos;
899 		}
900 
901 		/* check for missing but necessary attributes */
902 		if ((subtype = rde_attr_missing(asp, peer->conf.ebgp,
903 		    nlri_len))) {
904 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_MISSNG_WK_ATTR,
905 			    &subtype, sizeof(u_int8_t));
906 			goto done;
907 		}
908 
909 		rde_as4byte_fixup(peer, asp);
910 
911 		/* enforce remote AS if requested */
912 		if (asp->flags & F_ATTR_ASPATH &&
913 		    peer->conf.enforce_as == ENFORCE_AS_ON) {
914 			fas = aspath_neighbor(asp->aspath);
915 			if (peer->conf.remote_as != fas) {
916 			    log_peer_warnx(&peer->conf, "bad path, "
917 				"starting with %s, "
918 				"enforce neighbor-as enabled", log_as(fas));
919 			    rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
920 				    NULL, 0);
921 			    goto done;
922 			}
923 		}
924 
925 		rde_reflector(peer, asp);
926 	}
927 
928 	p = imsg->data;
929 	len = withdrawn_len;
930 	p += 2;
931 	/* withdraw prefix */
932 	while (len > 0) {
933 		if ((pos = rde_update_get_prefix(p, len, &prefix,
934 		    &prefixlen)) == -1) {
935 			/*
936 			 * the RFC does not mention what we should do in
937 			 * this case. Let's do the same as in the NLRI case.
938 			 */
939 			log_peer_warnx(&peer->conf, "bad withdraw prefix");
940 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
941 			    NULL, 0);
942 			goto done;
943 		}
944 		if (prefixlen > 32) {
945 			log_peer_warnx(&peer->conf, "bad withdraw prefix");
946 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
947 			    NULL, 0);
948 			goto done;
949 		}
950 
951 		p += pos;
952 		len -= pos;
953 
954 		if (peer->capa.mp[AID_INET] == 0) {
955 			log_peer_warnx(&peer->conf,
956 			    "bad withdraw, %s disabled", aid2str(AID_INET));
957 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
958 			    NULL, 0);
959 			goto done;
960 		}
961 
962 		rde_update_withdraw(peer, &prefix, prefixlen);
963 	}
964 
965 	if (attrpath_len == 0) {
966 		/* 0 = no NLRI information in this message */
967 		if (nlri_len != 0) {
968 			/* crap at end of update which should not be there */
969 			rde_update_err(peer, ERR_UPDATE,
970 			    ERR_UPD_ATTRLIST, NULL, 0);
971 			return (-1);
972 		}
973 		return (0);
974 	}
975 
976 	/* withdraw MP_UNREACH_NLRI if available */
977 	if (mpa.unreach_len != 0) {
978 		mpp = mpa.unreach;
979 		mplen = mpa.unreach_len;
980 		memcpy(&afi, mpp, 2);
981 		mpp += 2;
982 		mplen -= 2;
983 		afi = ntohs(afi);
984 		safi = *mpp++;
985 		mplen--;
986 
987 		if (afi2aid(afi, safi, &aid) == -1) {
988 			log_peer_warnx(&peer->conf,
989 			    "bad AFI/SAFI pair in withdraw");
990 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
991 			    NULL, 0);
992 			goto done;
993 		}
994 
995 		if (peer->capa.mp[aid] == 0) {
996 			log_peer_warnx(&peer->conf,
997 			    "bad withdraw, %s disabled", aid2str(aid));
998 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
999 			    NULL, 0);
1000 			goto done;
1001 		}
1002 
1003 		switch (aid) {
1004 		case AID_INET6:
1005 			while (mplen > 0) {
1006 				if ((pos = rde_update_get_prefix6(mpp, mplen,
1007 				    &prefix, &prefixlen)) == -1) {
1008 					log_peer_warnx(&peer->conf,
1009 					    "bad IPv6 withdraw prefix");
1010 					rde_update_err(peer, ERR_UPDATE,
1011 					    ERR_UPD_OPTATTR,
1012 					    mpa.unreach, mpa.unreach_len);
1013 					goto done;
1014 				}
1015 				if (prefixlen > 128) {
1016 					log_peer_warnx(&peer->conf,
1017 					    "bad IPv6 withdraw prefix");
1018 					rde_update_err(peer, ERR_UPDATE,
1019 					    ERR_UPD_OPTATTR,
1020 					    mpa.unreach, mpa.unreach_len);
1021 					goto done;
1022 				}
1023 
1024 				mpp += pos;
1025 				mplen -= pos;
1026 
1027 				rde_update_withdraw(peer, &prefix, prefixlen);
1028 			}
1029 			break;
1030 		case AID_VPN_IPv4:
1031 			while (mplen > 0) {
1032 				if ((pos = rde_update_get_vpn4(mpp, mplen,
1033 				    &prefix, &prefixlen)) == -1) {
1034 					log_peer_warnx(&peer->conf,
1035 					    "bad VPNv4 withdraw prefix");
1036 					rde_update_err(peer, ERR_UPDATE,
1037 					    ERR_UPD_OPTATTR,
1038 					    mpa.unreach, mpa.unreach_len);
1039 					goto done;
1040 				}
1041 				if (prefixlen > 32) {
1042 					log_peer_warnx(&peer->conf,
1043 					    "bad VPNv4 withdraw prefix");
1044 					rde_update_err(peer, ERR_UPDATE,
1045 					    ERR_UPD_OPTATTR,
1046 					    mpa.unreach, mpa.unreach_len);
1047 					goto done;
1048 				}
1049 
1050 				mpp += pos;
1051 				mplen -= pos;
1052 
1053 				rde_update_withdraw(peer, &prefix, prefixlen);
1054 			}
1055 			break;
1056 		default:
1057 			/* silently ignore unsupported multiprotocol AF */
1058 			break;
1059 		}
1060 
1061 		if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0) {
1062 			error = 0;
1063 			goto done;
1064 		}
1065 	}
1066 
1067 	/* shift to NLRI information */
1068 	p += 2 + attrpath_len;
1069 
1070 	/* aspath needs to be loop free nota bene this is not a hard error */
1071 	if (peer->conf.ebgp && !aspath_loopfree(asp->aspath, conf->as))
1072 		asp->flags |= F_ATTR_LOOP;
1073 
1074 	/* parse nlri prefix */
1075 	while (nlri_len > 0) {
1076 		if ((pos = rde_update_get_prefix(p, nlri_len, &prefix,
1077 		    &prefixlen)) == -1) {
1078 			log_peer_warnx(&peer->conf, "bad nlri prefix");
1079 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1080 			    NULL, 0);
1081 			goto done;
1082 		}
1083 		if (prefixlen > 32) {
1084 			log_peer_warnx(&peer->conf, "bad nlri prefix");
1085 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1086 			    NULL, 0);
1087 			goto done;
1088 		}
1089 
1090 		p += pos;
1091 		nlri_len -= pos;
1092 
1093 		if (peer->capa.mp[AID_INET] == 0) {
1094 			log_peer_warnx(&peer->conf,
1095 			    "bad update, %s disabled", aid2str(AID_INET));
1096 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1097 			    NULL, 0);
1098 			goto done;
1099 		}
1100 
1101 		rde_update_update(peer, asp, &prefix, prefixlen);
1102 
1103 		/* max prefix checker */
1104 		if (peer->conf.max_prefix &&
1105 		    peer->prefix_cnt >= peer->conf.max_prefix) {
1106 			log_peer_warnx(&peer->conf, "prefix limit reached");
1107 			rde_update_err(peer, ERR_CEASE, ERR_CEASE_MAX_PREFIX,
1108 			    NULL, 0);
1109 			goto done;
1110 		}
1111 
1112 	}
1113 
1114 	/* add MP_REACH_NLRI if available */
1115 	if (mpa.reach_len != 0) {
1116 		mpp = mpa.reach;
1117 		mplen = mpa.reach_len;
1118 		memcpy(&afi, mpp, 2);
1119 		mpp += 2;
1120 		mplen -= 2;
1121 		afi = ntohs(afi);
1122 		safi = *mpp++;
1123 		mplen--;
1124 
1125 		if (afi2aid(afi, safi, &aid) == -1) {
1126 			log_peer_warnx(&peer->conf,
1127 			    "bad AFI/SAFI pair in update");
1128 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1129 			    NULL, 0);
1130 			goto done;
1131 		}
1132 
1133 		if (peer->capa.mp[aid] == 0) {
1134 			log_peer_warnx(&peer->conf,
1135 			    "bad update, %s disabled", aid2str(aid));
1136 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1137 			    NULL, 0);
1138 			goto done;
1139 		}
1140 
1141 		/*
1142 		 * this works because asp is not linked.
1143 		 * But first unlock the previously locked nexthop.
1144 		 */
1145 		if (asp->nexthop) {
1146 			asp->nexthop->refcnt--;
1147 			(void)nexthop_delete(asp->nexthop);
1148 			asp->nexthop = NULL;
1149 		}
1150 		if ((pos = rde_get_mp_nexthop(mpp, mplen, aid, asp)) == -1) {
1151 			log_peer_warnx(&peer->conf, "bad nlri prefix");
1152 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1153 			    mpa.reach, mpa.reach_len);
1154 			goto done;
1155 		}
1156 		mpp += pos;
1157 		mplen -= pos;
1158 
1159 		switch (aid) {
1160 		case AID_INET6:
1161 			while (mplen > 0) {
1162 				if ((pos = rde_update_get_prefix6(mpp, mplen,
1163 				    &prefix, &prefixlen)) == -1) {
1164 					log_peer_warnx(&peer->conf,
1165 					    "bad IPv6 nlri prefix");
1166 					rde_update_err(peer, ERR_UPDATE,
1167 					    ERR_UPD_OPTATTR,
1168 					    mpa.reach, mpa.reach_len);
1169 					goto done;
1170 				}
1171 				if (prefixlen > 128) {
1172 					rde_update_err(peer, ERR_UPDATE,
1173 					    ERR_UPD_OPTATTR,
1174 					    mpa.reach, mpa.reach_len);
1175 					goto done;
1176 				}
1177 
1178 				mpp += pos;
1179 				mplen -= pos;
1180 
1181 				rde_update_update(peer, asp, &prefix,
1182 				    prefixlen);
1183 
1184 				/* max prefix checker */
1185 				if (peer->conf.max_prefix &&
1186 				    peer->prefix_cnt >= peer->conf.max_prefix) {
1187 					log_peer_warnx(&peer->conf,
1188 					    "prefix limit reached");
1189 					rde_update_err(peer, ERR_CEASE,
1190 					    ERR_CEASE_MAX_PREFIX, NULL, 0);
1191 					goto done;
1192 				}
1193 
1194 			}
1195 			break;
1196 		case AID_VPN_IPv4:
1197 			while (mplen > 0) {
1198 				if ((pos = rde_update_get_vpn4(mpp, mplen,
1199 				    &prefix, &prefixlen)) == -1) {
1200 					log_peer_warnx(&peer->conf,
1201 					    "bad VPNv4 nlri prefix");
1202 					rde_update_err(peer, ERR_UPDATE,
1203 					    ERR_UPD_OPTATTR,
1204 					    mpa.reach, mpa.reach_len);
1205 					goto done;
1206 				}
1207 				if (prefixlen > 32) {
1208 					rde_update_err(peer, ERR_UPDATE,
1209 					    ERR_UPD_OPTATTR,
1210 					    mpa.reach, mpa.reach_len);
1211 					goto done;
1212 				}
1213 
1214 				mpp += pos;
1215 				mplen -= pos;
1216 
1217 				rde_update_update(peer, asp, &prefix,
1218 				    prefixlen);
1219 
1220 				/* max prefix checker */
1221 				if (peer->conf.max_prefix &&
1222 				    peer->prefix_cnt >= peer->conf.max_prefix) {
1223 					log_peer_warnx(&peer->conf,
1224 					    "prefix limit reached");
1225 					rde_update_err(peer, ERR_CEASE,
1226 					    ERR_CEASE_MAX_PREFIX, NULL, 0);
1227 					goto done;
1228 				}
1229 
1230 			}
1231 			break;
1232 		default:
1233 			/* silently ignore unsupported multiprotocol AF */
1234 			break;
1235 		}
1236 	}
1237 
1238 done:
1239 	if (attrpath_len != 0) {
1240 		/* unlock the previously locked entry */
1241 		if (asp->nexthop) {
1242 			asp->nexthop->refcnt--;
1243 			(void)nexthop_delete(asp->nexthop);
1244 		}
1245 		/* free allocated attribute memory that is no longer used */
1246 		path_put(asp);
1247 	}
1248 
1249 	return (error);
1250 }
1251 
1252 extern u_int16_t rib_size;
1253 
1254 void
1255 rde_update_update(struct rde_peer *peer, struct rde_aspath *asp,
1256     struct bgpd_addr *prefix, u_int8_t prefixlen)
1257 {
1258 	struct rde_aspath	*fasp;
1259 	enum filter_actions	 action;
1260 	int			 r = 0, f = 0;
1261 	u_int16_t		 i;
1262 
1263 	peer->prefix_rcvd_update++;
1264 	/* add original path to the Adj-RIB-In */
1265 	if (peer->conf.softreconfig_in)
1266 		r += path_update(&ribs[0], peer, asp, prefix, prefixlen);
1267 
1268 	for (i = 1; i < rib_size; i++) {
1269 		/* input filter */
1270 		action = rde_filter(i, &fasp, rules_l, peer, asp, prefix,
1271 		    prefixlen, peer, DIR_IN);
1272 
1273 		if (fasp == NULL)
1274 			fasp = asp;
1275 
1276 		if (action == ACTION_ALLOW) {
1277 			rde_update_log("update", i, peer,
1278 			    &fasp->nexthop->exit_nexthop, prefix, prefixlen);
1279 			r += path_update(&ribs[i], peer, fasp, prefix,
1280 			    prefixlen);
1281 		} else if (prefix_remove(&ribs[i], peer, prefix, prefixlen,
1282 		    0)) {
1283 			rde_update_log("filtered withdraw", i, peer,
1284 			    NULL, prefix, prefixlen);
1285 			f++;
1286 		}
1287 
1288 		/* free modified aspath */
1289 		if (fasp != asp)
1290 			path_put(fasp);
1291 	}
1292 
1293 	if (r)
1294 		peer->prefix_cnt++;
1295 	else if (f)
1296 		peer->prefix_cnt--;
1297 }
1298 
1299 void
1300 rde_update_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix,
1301     u_int8_t prefixlen)
1302 {
1303 	int r = 0;
1304 	u_int16_t i;
1305 
1306 	peer->prefix_rcvd_withdraw++;
1307 
1308 	for (i = rib_size - 1; ; i--) {
1309 		if (prefix_remove(&ribs[i], peer, prefix, prefixlen, 0)) {
1310 			rde_update_log("withdraw", i, peer, NULL, prefix,
1311 			    prefixlen);
1312 			r++;
1313 		}
1314 		if (i == 0)
1315 			break;
1316 	}
1317 
1318 	if (r)
1319 		peer->prefix_cnt--;
1320 }
1321 
1322 /*
1323  * BGP UPDATE parser functions
1324  */
1325 
1326 /* attribute parser specific makros */
1327 #define UPD_READ(t, p, plen, n) \
1328 	do { \
1329 		memcpy(t, p, n); \
1330 		p += n; \
1331 		plen += n; \
1332 	} while (0)
1333 
1334 #define CHECK_FLAGS(s, t, m)	\
1335 	(((s) & ~(ATTR_EXTLEN | (m))) == (t))
1336 
1337 int
1338 rde_attr_parse(u_char *p, u_int16_t len, struct rde_peer *peer,
1339     struct rde_aspath *a, struct mpattr *mpa)
1340 {
1341 	struct bgpd_addr nexthop;
1342 	u_char		*op = p, *npath;
1343 	u_int32_t	 tmp32;
1344 	int		 err;
1345 	u_int16_t	 attr_len, nlen;
1346 	u_int16_t	 plen = 0;
1347 	u_int8_t	 flags;
1348 	u_int8_t	 type;
1349 	u_int8_t	 tmp8;
1350 
1351 	if (len < 3) {
1352 bad_len:
1353 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLEN, op, len);
1354 		return (-1);
1355 	}
1356 
1357 	UPD_READ(&flags, p, plen, 1);
1358 	UPD_READ(&type, p, plen, 1);
1359 
1360 	if (flags & ATTR_EXTLEN) {
1361 		if (len - plen < 2)
1362 			goto bad_len;
1363 		UPD_READ(&attr_len, p, plen, 2);
1364 		attr_len = ntohs(attr_len);
1365 	} else {
1366 		UPD_READ(&tmp8, p, plen, 1);
1367 		attr_len = tmp8;
1368 	}
1369 
1370 	if (len - plen < attr_len)
1371 		goto bad_len;
1372 
1373 	/* adjust len to the actual attribute size including header */
1374 	len = plen + attr_len;
1375 
1376 	switch (type) {
1377 	case ATTR_UNDEF:
1378 		/* ignore and drop path attributes with a type code of 0 */
1379 		plen += attr_len;
1380 		break;
1381 	case ATTR_ORIGIN:
1382 		if (attr_len != 1)
1383 			goto bad_len;
1384 
1385 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) {
1386 bad_flags:
1387 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRFLAGS,
1388 			    op, len);
1389 			return (-1);
1390 		}
1391 
1392 		UPD_READ(&a->origin, p, plen, 1);
1393 		if (a->origin > ORIGIN_INCOMPLETE) {
1394 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ORIGIN,
1395 			    op, len);
1396 			return (-1);
1397 		}
1398 		if (a->flags & F_ATTR_ORIGIN)
1399 			goto bad_list;
1400 		a->flags |= F_ATTR_ORIGIN;
1401 		break;
1402 	case ATTR_ASPATH:
1403 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1404 			goto bad_flags;
1405 		err = aspath_verify(p, attr_len, rde_as4byte(peer));
1406 		if (err == AS_ERR_SOFT) {
1407 			/*
1408 			 * soft errors like unexpected segment types are
1409 			 * not considered fatal and the path is just
1410 			 * marked invalid.
1411 			 */
1412 			a->flags |= F_ATTR_PARSE_ERR;
1413 			log_peer_warnx(&peer->conf, "bad ASPATH, "
1414 			    "path invalidated and prefix withdrawn");
1415 		} else if (err != 0) {
1416 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
1417 			    NULL, 0);
1418 			return (-1);
1419 		}
1420 		if (a->flags & F_ATTR_ASPATH)
1421 			goto bad_list;
1422 		if (rde_as4byte(peer)) {
1423 			npath = p;
1424 			nlen = attr_len;
1425 		} else
1426 			npath = aspath_inflate(p, attr_len, &nlen);
1427 		a->flags |= F_ATTR_ASPATH;
1428 		a->aspath = aspath_get(npath, nlen);
1429 		if (npath != p)
1430 			free(npath);
1431 		plen += attr_len;
1432 		break;
1433 	case ATTR_NEXTHOP:
1434 		if (attr_len != 4)
1435 			goto bad_len;
1436 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1437 			goto bad_flags;
1438 		if (a->flags & F_ATTR_NEXTHOP)
1439 			goto bad_list;
1440 		a->flags |= F_ATTR_NEXTHOP;
1441 
1442 		bzero(&nexthop, sizeof(nexthop));
1443 		nexthop.aid = AID_INET;
1444 		UPD_READ(&nexthop.v4.s_addr, p, plen, 4);
1445 		/*
1446 		 * Check if the nexthop is a valid IP address. We consider
1447 		 * multicast and experimental addresses as invalid.
1448 		 */
1449 		tmp32 = ntohl(nexthop.v4.s_addr);
1450 		if (IN_MULTICAST(tmp32) || IN_BADCLASS(tmp32)) {
1451 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1452 			    op, len);
1453 			return (-1);
1454 		}
1455 		a->nexthop = nexthop_get(&nexthop);
1456 		/*
1457 		 * lock the nexthop because it is not yet linked else
1458 		 * withdraws may remove this nexthop which in turn would
1459 		 * cause a use after free error.
1460 		 */
1461 		a->nexthop->refcnt++;
1462 		break;
1463 	case ATTR_MED:
1464 		if (attr_len != 4)
1465 			goto bad_len;
1466 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1467 			goto bad_flags;
1468 		if (a->flags & F_ATTR_MED)
1469 			goto bad_list;
1470 		a->flags |= F_ATTR_MED;
1471 
1472 		UPD_READ(&tmp32, p, plen, 4);
1473 		a->med = ntohl(tmp32);
1474 		break;
1475 	case ATTR_LOCALPREF:
1476 		if (attr_len != 4)
1477 			goto bad_len;
1478 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1479 			goto bad_flags;
1480 		if (peer->conf.ebgp) {
1481 			/* ignore local-pref attr on non ibgp peers */
1482 			plen += 4;
1483 			break;
1484 		}
1485 		if (a->flags & F_ATTR_LOCALPREF)
1486 			goto bad_list;
1487 		a->flags |= F_ATTR_LOCALPREF;
1488 
1489 		UPD_READ(&tmp32, p, plen, 4);
1490 		a->lpref = ntohl(tmp32);
1491 		break;
1492 	case ATTR_ATOMIC_AGGREGATE:
1493 		if (attr_len != 0)
1494 			goto bad_len;
1495 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1496 			goto bad_flags;
1497 		goto optattr;
1498 	case ATTR_AGGREGATOR:
1499 		if ((!rde_as4byte(peer) && attr_len != 6) ||
1500 		    (rde_as4byte(peer) && attr_len != 8)) {
1501 			/*
1502 			 * ignore attribute in case of error as per
1503 			 * draft-ietf-idr-optional-transitive-00.txt
1504 			 * but only if partial bit is set
1505 			 */
1506 			if ((flags & ATTR_PARTIAL) == 0)
1507 				goto bad_len;
1508 			log_peer_warnx(&peer->conf, "bad AGGREGATOR, "
1509 			    "partial attribute ignored");
1510 			plen += attr_len;
1511 			break;
1512 		}
1513 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1514 		    ATTR_PARTIAL))
1515 			goto bad_flags;
1516 		if (!rde_as4byte(peer)) {
1517 			/* need to inflate aggregator AS to 4-byte */
1518 			u_char	t[8];
1519 			t[0] = t[1] = 0;
1520 			UPD_READ(&t[2], p, plen, 2);
1521 			UPD_READ(&t[4], p, plen, 4);
1522 			if (attr_optadd(a, flags, type, t,
1523 			    sizeof(t)) == -1)
1524 				goto bad_list;
1525 			break;
1526 		}
1527 		/* 4-byte ready server take the default route */
1528 		goto optattr;
1529 	case ATTR_COMMUNITIES:
1530 		if (attr_len % 4 != 0) {
1531 			/*
1532 			 * mark update as bad and withdraw all routes as per
1533 			 * draft-ietf-idr-optional-transitive-00.txt
1534 			 * but only if partial bit is set
1535 			 */
1536 			if ((flags & ATTR_PARTIAL) == 0)
1537 				goto bad_len;
1538 			a->flags |= F_ATTR_PARSE_ERR;
1539 			log_peer_warnx(&peer->conf, "bad COMMUNITIES, "
1540 			    "path invalidated and prefix withdrawn");
1541 		}
1542 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1543 		    ATTR_PARTIAL))
1544 			goto bad_flags;
1545 		goto optattr;
1546 	case ATTR_EXT_COMMUNITIES:
1547 		if (attr_len % 8 != 0) {
1548 			/*
1549 			 * mark update as bad and withdraw all routes as per
1550 			 * draft-ietf-idr-optional-transitive-00.txt
1551 			 * but only if partial bit is set
1552 			 */
1553 			if ((flags & ATTR_PARTIAL) == 0)
1554 				goto bad_len;
1555 			a->flags |= F_ATTR_PARSE_ERR;
1556 			log_peer_warnx(&peer->conf, "bad EXT_COMMUNITIES, "
1557 			    "path invalidated and prefix withdrawn");
1558 		}
1559 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1560 		    ATTR_PARTIAL))
1561 			goto bad_flags;
1562 		goto optattr;
1563 	case ATTR_ORIGINATOR_ID:
1564 		if (attr_len != 4)
1565 			goto bad_len;
1566 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1567 			goto bad_flags;
1568 		goto optattr;
1569 	case ATTR_CLUSTER_LIST:
1570 		if (attr_len % 4 != 0)
1571 			goto bad_len;
1572 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1573 			goto bad_flags;
1574 		goto optattr;
1575 	case ATTR_MP_REACH_NLRI:
1576 		if (attr_len < 4)
1577 			goto bad_len;
1578 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1579 			goto bad_flags;
1580 		/* the validity is checked in rde_update_dispatch() */
1581 		if (a->flags & F_ATTR_MP_REACH)
1582 			goto bad_list;
1583 		a->flags |= F_ATTR_MP_REACH;
1584 
1585 		mpa->reach = p;
1586 		mpa->reach_len = attr_len;
1587 		plen += attr_len;
1588 		break;
1589 	case ATTR_MP_UNREACH_NLRI:
1590 		if (attr_len < 3)
1591 			goto bad_len;
1592 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1593 			goto bad_flags;
1594 		/* the validity is checked in rde_update_dispatch() */
1595 		if (a->flags & F_ATTR_MP_UNREACH)
1596 			goto bad_list;
1597 		a->flags |= F_ATTR_MP_UNREACH;
1598 
1599 		mpa->unreach = p;
1600 		mpa->unreach_len = attr_len;
1601 		plen += attr_len;
1602 		break;
1603 	case ATTR_AS4_AGGREGATOR:
1604 		if (attr_len != 8) {
1605 			/* see ATTR_AGGREGATOR ... */
1606 			if ((flags & ATTR_PARTIAL) == 0)
1607 				goto bad_len;
1608 			log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, "
1609 			    "partial attribute ignored");
1610 			plen += attr_len;
1611 			break;
1612 		}
1613 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1614 		    ATTR_PARTIAL))
1615 			goto bad_flags;
1616 		a->flags |= F_ATTR_AS4BYTE_NEW;
1617 		goto optattr;
1618 	case ATTR_AS4_PATH:
1619 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1620 		    ATTR_PARTIAL))
1621 			goto bad_flags;
1622 		if ((err = aspath_verify(p, attr_len, 1)) != 0) {
1623 			/*
1624 			 * XXX RFC does not specify how to handle errors.
1625 			 * XXX Instead of dropping the session because of a
1626 			 * XXX bad path just mark the full update as having
1627 			 * XXX a parse error which makes the update no longer
1628 			 * XXX eligible and will not be considered for routing
1629 			 * XXX or redistribution.
1630 			 * XXX We follow draft-ietf-idr-optional-transitive
1631 			 * XXX by looking at the partial bit.
1632 			 * XXX Consider soft errors similar to a partial attr.
1633 			 */
1634 			if (flags & ATTR_PARTIAL || err == AS_ERR_SOFT) {
1635 				a->flags |= F_ATTR_PARSE_ERR;
1636 				log_peer_warnx(&peer->conf, "bad AS4_PATH, "
1637 				    "path invalidated and prefix withdrawn");
1638 				goto optattr;
1639 			} else {
1640 				rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
1641 				    NULL, 0);
1642 				return (-1);
1643 			}
1644 		}
1645 		a->flags |= F_ATTR_AS4BYTE_NEW;
1646 		goto optattr;
1647 	default:
1648 		if ((flags & ATTR_OPTIONAL) == 0) {
1649 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_UNKNWN_WK_ATTR,
1650 			    op, len);
1651 			return (-1);
1652 		}
1653 optattr:
1654 		if (attr_optadd(a, flags, type, p, attr_len) == -1) {
1655 bad_list:
1656 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST,
1657 			    NULL, 0);
1658 			return (-1);
1659 		}
1660 
1661 		plen += attr_len;
1662 		break;
1663 	}
1664 
1665 	return (plen);
1666 }
1667 #undef UPD_READ
1668 #undef CHECK_FLAGS
1669 
1670 u_int8_t
1671 rde_attr_missing(struct rde_aspath *a, int ebgp, u_int16_t nlrilen)
1672 {
1673 	/* ATTR_MP_UNREACH_NLRI may be sent alone */
1674 	if (nlrilen == 0 && a->flags & F_ATTR_MP_UNREACH &&
1675 	    (a->flags & F_ATTR_MP_REACH) == 0)
1676 		return (0);
1677 
1678 	if ((a->flags & F_ATTR_ORIGIN) == 0)
1679 		return (ATTR_ORIGIN);
1680 	if ((a->flags & F_ATTR_ASPATH) == 0)
1681 		return (ATTR_ASPATH);
1682 	if ((a->flags & F_ATTR_MP_REACH) == 0 &&
1683 	    (a->flags & F_ATTR_NEXTHOP) == 0)
1684 		return (ATTR_NEXTHOP);
1685 	if (!ebgp)
1686 		if ((a->flags & F_ATTR_LOCALPREF) == 0)
1687 			return (ATTR_LOCALPREF);
1688 	return (0);
1689 }
1690 
1691 int
1692 rde_get_mp_nexthop(u_char *data, u_int16_t len, u_int8_t aid,
1693     struct rde_aspath *asp)
1694 {
1695 	struct bgpd_addr	nexthop;
1696 	u_int8_t		totlen, nhlen;
1697 
1698 	if (len == 0)
1699 		return (-1);
1700 
1701 	nhlen = *data++;
1702 	totlen = 1;
1703 	len--;
1704 
1705 	if (nhlen > len)
1706 		return (-1);
1707 
1708 	bzero(&nexthop, sizeof(nexthop));
1709 	nexthop.aid = aid;
1710 	switch (aid) {
1711 	case AID_INET6:
1712 		/*
1713 		 * RFC2545 describes that there may be a link-local
1714 		 * address carried in nexthop. Yikes!
1715 		 * This is not only silly, it is wrong and we just ignore
1716 		 * this link-local nexthop. The bgpd session doesn't run
1717 		 * over the link-local address so why should all other
1718 		 * traffic.
1719 		 */
1720 		if (nhlen != 16 && nhlen != 32) {
1721 			log_warnx("bad multiprotocol nexthop, bad size");
1722 			return (-1);
1723 		}
1724 		memcpy(&nexthop.v6.s6_addr, data, 16);
1725 		break;
1726 	case AID_VPN_IPv4:
1727 		/*
1728 		 * Neither RFC4364 nor RFC3107 specify the format of the
1729 		 * nexthop in an explicit way. The quality of RFC went down
1730 		 * the toilet the larger the the number got.
1731 		 * RFC4364 is very confusing about VPN-IPv4 address and the
1732 		 * VPN-IPv4 prefix that carries also a MPLS label.
1733 		 * So the nexthop is a 12-byte address with a 64bit RD and
1734 		 * an IPv4 address following. In the nexthop case the RD can
1735 		 * be ignored.
1736 		 * Since the nexthop has to be in the main IPv4 table just
1737 		 * create an AID_INET nexthop. So we don't need to handle
1738 		 * AID_VPN_IPv4 in nexthop and kroute.
1739 		 */
1740 		if (nhlen != 12) {
1741 			log_warnx("bad multiprotocol nexthop, bad size");
1742 			return (-1);
1743 		}
1744 		data += sizeof(u_int64_t);
1745 		nexthop.aid = AID_INET;
1746 		memcpy(&nexthop.v4, data, sizeof(nexthop.v4));
1747 		break;
1748 	default:
1749 		log_warnx("bad multiprotocol nexthop, bad AID");
1750 		return (-1);
1751 	}
1752 
1753 	asp->nexthop = nexthop_get(&nexthop);
1754 	/*
1755 	 * lock the nexthop because it is not yet linked else
1756 	 * withdraws may remove this nexthop which in turn would
1757 	 * cause a use after free error.
1758 	 */
1759 	asp->nexthop->refcnt++;
1760 
1761 	/* ignore reserved (old SNPA) field as per RFC4760 */
1762 	totlen += nhlen + 1;
1763 	data += nhlen + 1;
1764 
1765 	return (totlen);
1766 }
1767 
1768 int
1769 rde_update_extract_prefix(u_char *p, u_int16_t len, void *va,
1770     u_int8_t pfxlen, u_int8_t max)
1771 {
1772 	static u_char addrmask[] = {
1773 	    0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
1774 	u_char		*a = va;
1775 	int		 i;
1776 	u_int16_t	 plen = 0;
1777 
1778 	for (i = 0; pfxlen && i < max; i++) {
1779 		if (len <= plen)
1780 			return (-1);
1781 		if (pfxlen < 8) {
1782 			a[i] = *p++ & addrmask[pfxlen];
1783 			plen++;
1784 			break;
1785 		} else {
1786 			a[i] = *p++;
1787 			plen++;
1788 			pfxlen -= 8;
1789 		}
1790 	}
1791 	return (plen);
1792 }
1793 
1794 int
1795 rde_update_get_prefix(u_char *p, u_int16_t len, struct bgpd_addr *prefix,
1796     u_int8_t *prefixlen)
1797 {
1798 	u_int8_t	 pfxlen;
1799 	int		 plen;
1800 
1801 	if (len < 1)
1802 		return (-1);
1803 
1804 	pfxlen = *p++;
1805 	len--;
1806 
1807 	bzero(prefix, sizeof(struct bgpd_addr));
1808 	prefix->aid = AID_INET;
1809 	*prefixlen = pfxlen;
1810 
1811 	if ((plen = rde_update_extract_prefix(p, len, &prefix->v4, pfxlen,
1812 	    sizeof(prefix->v4))) == -1)
1813 		return (-1);
1814 
1815 	return (plen + 1);	/* pfxlen needs to be added */
1816 }
1817 
1818 int
1819 rde_update_get_prefix6(u_char *p, u_int16_t len, struct bgpd_addr *prefix,
1820     u_int8_t *prefixlen)
1821 {
1822 	int		plen;
1823 	u_int8_t	pfxlen;
1824 
1825 	if (len < 1)
1826 		return (-1);
1827 
1828 	pfxlen = *p++;
1829 	len--;
1830 
1831 	bzero(prefix, sizeof(struct bgpd_addr));
1832 	prefix->aid = AID_INET6;
1833 	*prefixlen = pfxlen;
1834 
1835 	if ((plen = rde_update_extract_prefix(p, len, &prefix->v6, pfxlen,
1836 	    sizeof(prefix->v6))) == -1)
1837 		return (-1);
1838 
1839 	return (plen + 1);	/* pfxlen needs to be added */
1840 }
1841 
1842 int
1843 rde_update_get_vpn4(u_char *p, u_int16_t len, struct bgpd_addr *prefix,
1844     u_int8_t *prefixlen)
1845 {
1846 	int		 rv, done = 0;
1847 	u_int8_t	 pfxlen;
1848 	u_int16_t	 plen;
1849 
1850 	if (len < 1)
1851 		return (-1);
1852 
1853 	memcpy(&pfxlen, p, 1);
1854 	p += 1;
1855 	plen = 1;
1856 
1857 	bzero(prefix, sizeof(struct bgpd_addr));
1858 
1859 	/* label stack */
1860 	do {
1861 		if (len - plen < 3 || pfxlen < 3 * 8)
1862 			return (-1);
1863 		if (prefix->vpn4.labellen + 3U >
1864 		    sizeof(prefix->vpn4.labelstack))
1865 			return (-1);
1866 		prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++;
1867 		prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++;
1868 		prefix->vpn4.labelstack[prefix->vpn4.labellen] = *p++;
1869 		if (prefix->vpn4.labelstack[prefix->vpn4.labellen] &
1870 		    BGP_MPLS_BOS)
1871 			done = 1;
1872 		prefix->vpn4.labellen++;
1873 		plen += 3;
1874 		pfxlen -= 3 * 8;
1875 	} while (!done);
1876 
1877 	/* RD */
1878 	if (len - plen < (int)sizeof(u_int64_t) ||
1879 	    pfxlen < sizeof(u_int64_t) * 8)
1880 		return (-1);
1881 	memcpy(&prefix->vpn4.rd, p, sizeof(u_int64_t));
1882 	pfxlen -= sizeof(u_int64_t) * 8;
1883 	p += sizeof(u_int64_t);
1884 	plen += sizeof(u_int64_t);
1885 
1886 	/* prefix */
1887 	prefix->aid = AID_VPN_IPv4;
1888 	*prefixlen = pfxlen;
1889 
1890 	if ((rv = rde_update_extract_prefix(p, len, &prefix->vpn4.addr,
1891 	    pfxlen, sizeof(prefix->vpn4.addr))) == -1)
1892 		return (-1);
1893 
1894 	return (plen + rv);
1895 }
1896 
1897 void
1898 rde_update_err(struct rde_peer *peer, u_int8_t error, u_int8_t suberr,
1899     void *data, u_int16_t size)
1900 {
1901 	struct ibuf	*wbuf;
1902 
1903 	if ((wbuf = imsg_create(ibuf_se, IMSG_UPDATE_ERR, peer->conf.id, 0,
1904 	    size + sizeof(error) + sizeof(suberr))) == NULL)
1905 		fatal("imsg_create error");
1906 	if (imsg_add(wbuf, &error, sizeof(error)) == -1 ||
1907 	    imsg_add(wbuf, &suberr, sizeof(suberr)) == -1 ||
1908 	    imsg_add(wbuf, data, size) == -1)
1909 		fatal("imsg_add error");
1910 	imsg_close(ibuf_se, wbuf);
1911 	peer->state = PEER_ERR;
1912 }
1913 
1914 void
1915 rde_update_log(const char *message, u_int16_t rid,
1916     const struct rde_peer *peer, const struct bgpd_addr *next,
1917     const struct bgpd_addr *prefix, u_int8_t prefixlen)
1918 {
1919 	char		*l = NULL;
1920 	char		*n = NULL;
1921 	char		*p = NULL;
1922 
1923 	if (!(conf->log & BGPD_LOG_UPDATES))
1924 		return;
1925 
1926 	if (next != NULL)
1927 		if (asprintf(&n, " via %s", log_addr(next)) == -1)
1928 			n = NULL;
1929 	if (asprintf(&p, "%s/%u", log_addr(prefix), prefixlen) == -1)
1930 		p = NULL;
1931 	l = log_fmt_peer(&peer->conf);
1932 	log_info("Rib %s: %s AS%s: %s %s%s", ribs[rid].name,
1933 	    l, log_as(peer->conf.remote_as), message,
1934 	    p ? p : "out of memory", n ? n : "");
1935 
1936 	free(l);
1937 	free(n);
1938 	free(p);
1939 }
1940 
1941 /*
1942  * 4-Byte ASN helper function.
1943  * Two scenarios need to be considered:
1944  * - NEW session with NEW attributes present -> just remove the attributes
1945  * - OLD session with NEW attributes present -> try to merge them
1946  */
1947 void
1948 rde_as4byte_fixup(struct rde_peer *peer, struct rde_aspath *a)
1949 {
1950 	struct attr	*nasp, *naggr, *oaggr;
1951 	u_int32_t	 as;
1952 
1953 	/*
1954 	 * if either ATTR_AS4_AGGREGATOR or ATTR_AS4_PATH is present
1955 	 * try to fixup the attributes.
1956 	 * Do not fixup if F_ATTR_PARSE_ERR is set.
1957 	 */
1958 	if (!(a->flags & F_ATTR_AS4BYTE_NEW) || a->flags & F_ATTR_PARSE_ERR)
1959 		return;
1960 
1961 	/* first get the attributes */
1962 	nasp = attr_optget(a, ATTR_AS4_PATH);
1963 	naggr = attr_optget(a, ATTR_AS4_AGGREGATOR);
1964 
1965 	if (rde_as4byte(peer)) {
1966 		/* NEW session using 4-byte ASNs */
1967 		if (nasp) {
1968 			log_peer_warnx(&peer->conf, "uses 4-byte ASN "
1969 			    "but sent AS4_PATH attribute.");
1970 			attr_free(a, nasp);
1971 		}
1972 		if (naggr) {
1973 			log_peer_warnx(&peer->conf, "uses 4-byte ASN "
1974 			    "but sent AS4_AGGREGATOR attribute.");
1975 			attr_free(a, naggr);
1976 		}
1977 		return;
1978 	}
1979 	/* OLD session using 2-byte ASNs */
1980 	/* try to merge the new attributes into the old ones */
1981 	if ((oaggr = attr_optget(a, ATTR_AGGREGATOR))) {
1982 		memcpy(&as, oaggr->data, sizeof(as));
1983 		if (ntohl(as) != AS_TRANS) {
1984 			/* per RFC ignore AS4_PATH and AS4_AGGREGATOR */
1985 			if (nasp)
1986 				attr_free(a, nasp);
1987 			if (naggr)
1988 				attr_free(a, naggr);
1989 			return;
1990 		}
1991 		if (naggr) {
1992 			/* switch over to new AGGREGATOR */
1993 			attr_free(a, oaggr);
1994 			if (attr_optadd(a, ATTR_OPTIONAL | ATTR_TRANSITIVE,
1995 			    ATTR_AGGREGATOR, naggr->data, naggr->len))
1996 				fatalx("attr_optadd failed but impossible");
1997 		}
1998 	}
1999 	/* there is no need for AS4_AGGREGATOR any more */
2000 	if (naggr)
2001 		attr_free(a, naggr);
2002 
2003 	/* merge AS4_PATH with ASPATH */
2004 	if (nasp)
2005 		aspath_merge(a, nasp);
2006 }
2007 
2008 
2009 /*
2010  * route reflector helper function
2011  */
2012 void
2013 rde_reflector(struct rde_peer *peer, struct rde_aspath *asp)
2014 {
2015 	struct attr	*a;
2016 	u_int8_t	*p;
2017 	u_int16_t	 len;
2018 	u_int32_t	 id;
2019 
2020 	/* do not consider updates with parse errors */
2021 	if (asp->flags & F_ATTR_PARSE_ERR)
2022 		return;
2023 
2024 	/* check for originator id if eq router_id drop */
2025 	if ((a = attr_optget(asp, ATTR_ORIGINATOR_ID)) != NULL) {
2026 		if (memcmp(&conf->bgpid, a->data, sizeof(conf->bgpid)) == 0) {
2027 			/* this is coming from myself */
2028 			asp->flags |= F_ATTR_LOOP;
2029 			return;
2030 		}
2031 	} else if (conf->flags & BGPD_FLAG_REFLECTOR) {
2032 		if (peer->conf.ebgp == 0)
2033 			id = htonl(peer->remote_bgpid);
2034 		else
2035 			id = conf->bgpid;
2036 		if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_ORIGINATOR_ID,
2037 		    &id, sizeof(u_int32_t)) == -1)
2038 			fatalx("attr_optadd failed but impossible");
2039 	}
2040 
2041 	/* check for own id in the cluster list */
2042 	if (conf->flags & BGPD_FLAG_REFLECTOR) {
2043 		if ((a = attr_optget(asp, ATTR_CLUSTER_LIST)) != NULL) {
2044 			for (len = 0; len < a->len;
2045 			    len += sizeof(conf->clusterid))
2046 				/* check if coming from my cluster */
2047 				if (memcmp(&conf->clusterid, a->data + len,
2048 				    sizeof(conf->clusterid)) == 0) {
2049 					asp->flags |= F_ATTR_LOOP;
2050 					return;
2051 				}
2052 
2053 			/* prepend own clusterid by replacing attribute */
2054 			len = a->len + sizeof(conf->clusterid);
2055 			if (len < a->len)
2056 				fatalx("rde_reflector: cluster-list overflow");
2057 			if ((p = malloc(len)) == NULL)
2058 				fatal("rde_reflector");
2059 			memcpy(p, &conf->clusterid, sizeof(conf->clusterid));
2060 			memcpy(p + sizeof(conf->clusterid), a->data, a->len);
2061 			attr_free(asp, a);
2062 			if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST,
2063 			    p, len) == -1)
2064 				fatalx("attr_optadd failed but impossible");
2065 			free(p);
2066 		} else if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST,
2067 		    &conf->clusterid, sizeof(conf->clusterid)) == -1)
2068 			fatalx("attr_optadd failed but impossible");
2069 	}
2070 }
2071 
2072 /*
2073  * control specific functions
2074  */
2075 void
2076 rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
2077 {
2078 	struct ctl_show_rib	 rib;
2079 	struct ibuf		*wbuf;
2080 	struct attr		*a;
2081 	void			*bp;
2082 	u_int8_t		 l;
2083 
2084 	bzero(&rib, sizeof(rib));
2085 	rib.lastchange = p->lastchange;
2086 	rib.local_pref = asp->lpref;
2087 	rib.med = asp->med;
2088 	rib.prefix_cnt = asp->prefix_cnt;
2089 	rib.active_cnt = asp->active_cnt;
2090 	strlcpy(rib.descr, asp->peer->conf.descr, sizeof(rib.descr));
2091 	memcpy(&rib.remote_addr, &asp->peer->remote_addr,
2092 	    sizeof(rib.remote_addr));
2093 	rib.remote_id = asp->peer->remote_bgpid;
2094 	if (asp->nexthop != NULL) {
2095 		memcpy(&rib.true_nexthop, &asp->nexthop->true_nexthop,
2096 		    sizeof(rib.true_nexthop));
2097 		memcpy(&rib.exit_nexthop, &asp->nexthop->exit_nexthop,
2098 		    sizeof(rib.exit_nexthop));
2099 	} else {
2100 		/* announced network may have a NULL nexthop */
2101 		bzero(&rib.true_nexthop, sizeof(rib.true_nexthop));
2102 		bzero(&rib.exit_nexthop, sizeof(rib.exit_nexthop));
2103 		rib.true_nexthop.aid = p->prefix->aid;
2104 		rib.exit_nexthop.aid = p->prefix->aid;
2105 	}
2106 	pt_getaddr(p->prefix, &rib.prefix);
2107 	rib.prefixlen = p->prefix->prefixlen;
2108 	rib.origin = asp->origin;
2109 	rib.flags = 0;
2110 	if (p->rib->active == p)
2111 		rib.flags |= F_PREF_ACTIVE;
2112 	if (asp->peer->conf.ebgp == 0)
2113 		rib.flags |= F_PREF_INTERNAL;
2114 	if (asp->flags & F_PREFIX_ANNOUNCED)
2115 		rib.flags |= F_PREF_ANNOUNCE;
2116 	if (asp->nexthop == NULL || asp->nexthop->state == NEXTHOP_REACH)
2117 		rib.flags |= F_PREF_ELIGIBLE;
2118 	if (asp->flags & F_ATTR_LOOP)
2119 		rib.flags &= ~F_PREF_ELIGIBLE;
2120 	rib.aspath_len = aspath_length(asp->aspath);
2121 
2122 	if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid,
2123 	    sizeof(rib) + rib.aspath_len)) == NULL)
2124 		return;
2125 	if (imsg_add(wbuf, &rib, sizeof(rib)) == -1 ||
2126 	    imsg_add(wbuf, aspath_dump(asp->aspath),
2127 	    rib.aspath_len) == -1)
2128 		return;
2129 	imsg_close(ibuf_se_ctl, wbuf);
2130 
2131 	if (flags & F_CTL_DETAIL)
2132 		for (l = 0; l < asp->others_len; l++) {
2133 			if ((a = asp->others[l]) == NULL)
2134 				break;
2135 			if ((wbuf = imsg_create(ibuf_se_ctl,
2136 			    IMSG_CTL_SHOW_RIB_ATTR, 0, pid,
2137 			    attr_optlen(a))) == NULL)
2138 				return;
2139 			if ((bp = ibuf_reserve(wbuf, attr_optlen(a))) == NULL) {
2140 				ibuf_free(wbuf);
2141 				return;
2142 			}
2143 			if (attr_write(bp, attr_optlen(a), a->flags,
2144 			    a->type, a->data, a->len) == -1) {
2145 				ibuf_free(wbuf);
2146 				return;
2147 			}
2148 			imsg_close(ibuf_se_ctl, wbuf);
2149 		}
2150 }
2151 
2152 void
2153 rde_dump_filterout(struct rde_peer *peer, struct prefix *p,
2154     struct ctl_show_rib_request *req)
2155 {
2156 	struct bgpd_addr	 addr;
2157 	struct rde_aspath	*asp;
2158 	enum filter_actions	 a;
2159 
2160 	if (up_test_update(peer, p) != 1)
2161 		return;
2162 
2163 	pt_getaddr(p->prefix, &addr);
2164 	a = rde_filter(1 /* XXX */, &asp, rules_l, peer, p->aspath, &addr,
2165 	    p->prefix->prefixlen, p->aspath->peer, DIR_OUT);
2166 	if (asp)
2167 		asp->peer = p->aspath->peer;
2168 	else
2169 		asp = p->aspath;
2170 
2171 	if (a == ACTION_ALLOW)
2172 		rde_dump_rib_as(p, asp, req->pid, req->flags);
2173 
2174 	if (asp != p->aspath)
2175 		path_put(asp);
2176 }
2177 
2178 void
2179 rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
2180 {
2181 	struct rde_peer		*peer;
2182 
2183 	if (req->flags & F_CTL_ADJ_IN ||
2184 	    !(req->flags & (F_CTL_ADJ_IN|F_CTL_ADJ_OUT))) {
2185 		if (req->peerid && req->peerid != p->aspath->peer->conf.id)
2186 			return;
2187 		if (req->type == IMSG_CTL_SHOW_RIB_AS &&
2188 		    !aspath_match(p->aspath->aspath, req->as.type, req->as.as))
2189 			return;
2190 		if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
2191 		    !community_match(p->aspath, req->community.as,
2192 		    req->community.type))
2193 			return;
2194 		rde_dump_rib_as(p, p->aspath, req->pid, req->flags);
2195 	} else if (req->flags & F_CTL_ADJ_OUT) {
2196 		if (p->rib->active != p)
2197 			/* only consider active prefix */
2198 			return;
2199 		if (req->peerid) {
2200 			if ((peer = peer_get(req->peerid)) != NULL)
2201 				rde_dump_filterout(peer, p, req);
2202 			return;
2203 		}
2204 	}
2205 }
2206 
2207 void
2208 rde_dump_upcall(struct rib_entry *re, void *ptr)
2209 {
2210 	struct prefix		*p;
2211 	struct rde_dump_ctx	*ctx = ptr;
2212 
2213 	LIST_FOREACH(p, &re->prefix_h, rib_l)
2214 		rde_dump_filter(p, &ctx->req);
2215 }
2216 
2217 void
2218 rde_dump_prefix_upcall(struct rib_entry *re, void *ptr)
2219 {
2220 	struct rde_dump_ctx	*ctx = ptr;
2221 	struct prefix		*p;
2222 	struct pt_entry		*pt;
2223 	struct bgpd_addr	 addr;
2224 
2225 	pt = re->prefix;
2226 	pt_getaddr(pt, &addr);
2227 	if (addr.aid != ctx->req.prefix.aid)
2228 		return;
2229 	if (ctx->req.prefixlen > pt->prefixlen)
2230 		return;
2231 	if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen))
2232 		LIST_FOREACH(p, &re->prefix_h, rib_l)
2233 			rde_dump_filter(p, &ctx->req);
2234 }
2235 
2236 void
2237 rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid,
2238     enum imsg_type type)
2239 {
2240 	struct rde_dump_ctx	*ctx;
2241 	struct rib_entry	*re;
2242 	u_int			 error;
2243 	u_int16_t		 id;
2244 	u_int8_t		 hostplen;
2245 
2246 	if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
2247 		log_warn("rde_dump_ctx_new");
2248 		error = CTL_RES_NOMEM;
2249 		imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
2250 		    sizeof(error));
2251 		return;
2252 	}
2253 	if ((id = rib_find(req->rib)) == RIB_FAILED) {
2254 		log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
2255 		error = CTL_RES_NOSUCHPEER;
2256 		imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
2257 		    sizeof(error));
2258 		free(ctx);
2259 		return;
2260 	}
2261 
2262 	memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
2263 	ctx->req.pid = pid;
2264 	ctx->req.type = type;
2265 	ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS;
2266 	ctx->ribctx.ctx_rib = &ribs[id];
2267 	switch (ctx->req.type) {
2268 	case IMSG_CTL_SHOW_NETWORK:
2269 		ctx->ribctx.ctx_upcall = network_dump_upcall;
2270 		break;
2271 	case IMSG_CTL_SHOW_RIB:
2272 	case IMSG_CTL_SHOW_RIB_AS:
2273 	case IMSG_CTL_SHOW_RIB_COMMUNITY:
2274 		ctx->ribctx.ctx_upcall = rde_dump_upcall;
2275 		break;
2276 	case IMSG_CTL_SHOW_RIB_PREFIX:
2277 		if (req->flags & F_LONGER) {
2278 			ctx->ribctx.ctx_upcall = rde_dump_prefix_upcall;
2279 			break;
2280 		}
2281 		switch (req->prefix.aid) {
2282 		case AID_INET:
2283 		case AID_VPN_IPv4:
2284 			hostplen = 32;
2285 			break;
2286 		case AID_INET6:
2287 			hostplen = 128;
2288 			break;
2289 		default:
2290 			fatalx("rde_dump_ctx_new: unknown af");
2291 		}
2292 		if (req->prefixlen == hostplen)
2293 			re = rib_lookup(&ribs[id], &req->prefix);
2294 		else
2295 			re = rib_get(&ribs[id], &req->prefix, req->prefixlen);
2296 		if (re)
2297 			rde_dump_upcall(re, ctx);
2298 		rde_dump_done(ctx);
2299 		return;
2300 	default:
2301 		fatalx("rde_dump_ctx_new: unsupported imsg type");
2302 	}
2303 	ctx->ribctx.ctx_done = rde_dump_done;
2304 	ctx->ribctx.ctx_arg = ctx;
2305 	ctx->ribctx.ctx_aid = ctx->req.aid;
2306 	rib_dump_r(&ctx->ribctx);
2307 }
2308 
2309 void
2310 rde_dump_done(void *arg)
2311 {
2312 	struct rde_dump_ctx	*ctx = arg;
2313 
2314 	imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
2315 	    -1, NULL, 0);
2316 	free(ctx);
2317 }
2318 
2319 void
2320 rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd)
2321 {
2322 	struct rde_mrt_ctx	*ctx;
2323 	u_int16_t		 id;
2324 
2325 	if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
2326 		log_warn("rde_dump_mrt_new");
2327 		return;
2328 	}
2329 	memcpy(&ctx->mrt, mrt, sizeof(struct mrt));
2330 	TAILQ_INIT(&ctx->mrt.wbuf.bufs);
2331 	ctx->mrt.wbuf.fd = fd;
2332 	ctx->mrt.state = MRT_STATE_RUNNING;
2333 	id = rib_find(ctx->mrt.rib);
2334 	if (id == RIB_FAILED) {
2335 		log_warnx("non existing RIB %s for mrt dump", ctx->mrt.rib);
2336 		free(ctx);
2337 		return;
2338 	}
2339 	ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS;
2340 	ctx->ribctx.ctx_rib = &ribs[id];
2341 	ctx->ribctx.ctx_upcall = mrt_dump_upcall;
2342 	ctx->ribctx.ctx_done = mrt_done;
2343 	ctx->ribctx.ctx_arg = &ctx->mrt;
2344 	ctx->ribctx.ctx_aid = AID_UNSPEC;
2345 	LIST_INSERT_HEAD(&rde_mrts, ctx, entry);
2346 	rde_mrt_cnt++;
2347 	rib_dump_r(&ctx->ribctx);
2348 }
2349 
2350 /*
2351  * kroute specific functions
2352  */
2353 int
2354 rde_rdomain_import(struct rde_aspath *asp, struct rdomain *rd)
2355 {
2356 	struct filter_set	*s;
2357 
2358 	TAILQ_FOREACH(s, &rd->import, entry) {
2359 		if (community_ext_match(asp, &s->action.ext_community, 0))
2360 			return (1);
2361 	}
2362 	return (0);
2363 }
2364 
2365 void
2366 rde_send_kroute(struct prefix *new, struct prefix *old, u_int16_t ribid)
2367 {
2368 	struct kroute_full	 kr;
2369 	struct bgpd_addr	 addr;
2370 	struct prefix		*p;
2371 	struct rdomain		*rd;
2372 	enum imsg_type		 type;
2373 
2374 	/*
2375 	 * Make sure that self announce prefixes are not commited to the
2376 	 * FIB. If both prefixes are unreachable no update is needed.
2377 	 */
2378 	if ((old == NULL || old->aspath->flags & F_PREFIX_ANNOUNCED) &&
2379 	    (new == NULL || new->aspath->flags & F_PREFIX_ANNOUNCED))
2380 		return;
2381 
2382 	if (new == NULL || new->aspath->flags & F_PREFIX_ANNOUNCED) {
2383 		type = IMSG_KROUTE_DELETE;
2384 		p = old;
2385 	} else {
2386 		type = IMSG_KROUTE_CHANGE;
2387 		p = new;
2388 	}
2389 
2390 	pt_getaddr(p->prefix, &addr);
2391 	bzero(&kr, sizeof(kr));
2392 	memcpy(&kr.prefix, &addr, sizeof(kr.prefix));
2393 	kr.prefixlen = p->prefix->prefixlen;
2394 	if (p->aspath->flags & F_NEXTHOP_REJECT)
2395 		kr.flags |= F_REJECT;
2396 	if (p->aspath->flags & F_NEXTHOP_BLACKHOLE)
2397 		kr.flags |= F_BLACKHOLE;
2398 	if (type == IMSG_KROUTE_CHANGE)
2399 		memcpy(&kr.nexthop, &p->aspath->nexthop->true_nexthop,
2400 		    sizeof(kr.nexthop));
2401 	strlcpy(kr.label, rtlabel_id2name(p->aspath->rtlabelid),
2402 	    sizeof(kr.label));
2403 
2404 	switch (addr.aid) {
2405 	case AID_VPN_IPv4:
2406 		if (ribid != 1)
2407 			/* not Loc-RIB, no update for VPNs */
2408 			break;
2409 
2410 		SIMPLEQ_FOREACH(rd, rdomains_l, entry) {
2411 			if (addr.vpn4.rd != rd->rd)
2412 				continue;
2413 			if (!rde_rdomain_import(p->aspath, rd))
2414 				continue;
2415 			/* must send exit_nexthop so that correct MPLS tunnel
2416 			 * is chosen
2417 			 */
2418 			if (type == IMSG_KROUTE_CHANGE)
2419 				memcpy(&kr.nexthop,
2420 				    &p->aspath->nexthop->exit_nexthop,
2421 				    sizeof(kr.nexthop));
2422 			if (imsg_compose(ibuf_main, type, rd->rtableid, 0, -1,
2423 			    &kr, sizeof(kr)) == -1)
2424 				fatal("imsg_compose error");
2425 		}
2426 		break;
2427 	default:
2428 		if (imsg_compose(ibuf_main, type, ribs[ribid].rtableid, 0, -1,
2429 		    &kr, sizeof(kr)) == -1)
2430 			fatal("imsg_compose error");
2431 		break;
2432 	}
2433 }
2434 
2435 /*
2436  * pf table specific functions
2437  */
2438 void
2439 rde_send_pftable(u_int16_t id, struct bgpd_addr *addr,
2440     u_int8_t len, int del)
2441 {
2442 	struct pftable_msg pfm;
2443 
2444 	if (id == 0)
2445 		return;
2446 
2447 	/* do not run while cleaning up */
2448 	if (rde_quit)
2449 		return;
2450 
2451 	bzero(&pfm, sizeof(pfm));
2452 	strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable));
2453 	memcpy(&pfm.addr, addr, sizeof(pfm.addr));
2454 	pfm.len = len;
2455 
2456 	if (imsg_compose(ibuf_main,
2457 	    del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD,
2458 	    0, 0, -1, &pfm, sizeof(pfm)) == -1)
2459 		fatal("imsg_compose error");
2460 }
2461 
2462 void
2463 rde_send_pftable_commit(void)
2464 {
2465 	/* do not run while cleaning up */
2466 	if (rde_quit)
2467 		return;
2468 
2469 	if (imsg_compose(ibuf_main, IMSG_PFTABLE_COMMIT, 0, 0, -1, NULL, 0) ==
2470 	    -1)
2471 		fatal("imsg_compose error");
2472 }
2473 
2474 /*
2475  * nexthop specific functions
2476  */
2477 void
2478 rde_send_nexthop(struct bgpd_addr *next, int valid)
2479 {
2480 	int			 type;
2481 
2482 	if (valid)
2483 		type = IMSG_NEXTHOP_ADD;
2484 	else
2485 		type = IMSG_NEXTHOP_REMOVE;
2486 
2487 	if (imsg_compose(ibuf_main, type, 0, 0, -1, next,
2488 	    sizeof(struct bgpd_addr)) == -1)
2489 		fatal("imsg_compose error");
2490 }
2491 
2492 /*
2493  * soft reconfig specific functions
2494  */
2495 void
2496 rde_softreconfig_out(struct rib_entry *re, void *ptr)
2497 {
2498 	struct prefix		*p = re->active;
2499 	struct pt_entry		*pt;
2500 	struct rde_peer		*peer;
2501 	struct rde_aspath	*oasp, *nasp;
2502 	enum filter_actions	 oa, na;
2503 	struct bgpd_addr	 addr;
2504 
2505 	if (p == NULL)
2506 		return;
2507 
2508 	pt = re->prefix;
2509 	pt_getaddr(pt, &addr);
2510 	LIST_FOREACH(peer, &peerlist, peer_l) {
2511 		if (peer->conf.id == 0)
2512 			continue;
2513 		if (peer->ribid != re->ribid)
2514 			continue;
2515 		if (peer->reconf_out == 0)
2516 			continue;
2517 		if (up_test_update(peer, p) != 1)
2518 			continue;
2519 
2520 		oa = rde_filter(re->ribid, &oasp, rules_l, peer, p->aspath,
2521 		    &addr, pt->prefixlen, p->aspath->peer, DIR_OUT);
2522 		na = rde_filter(re->ribid, &nasp, newrules, peer, p->aspath,
2523 		    &addr, pt->prefixlen, p->aspath->peer, DIR_OUT);
2524 		oasp = oasp != NULL ? oasp : p->aspath;
2525 		nasp = nasp != NULL ? nasp : p->aspath;
2526 
2527 		if (oa == ACTION_DENY && na == ACTION_DENY)
2528 			/* nothing todo */
2529 			goto done;
2530 		if (oa == ACTION_DENY && na == ACTION_ALLOW) {
2531 			/* send update */
2532 			up_generate(peer, nasp, &addr, pt->prefixlen);
2533 			goto done;
2534 		}
2535 		if (oa == ACTION_ALLOW && na == ACTION_DENY) {
2536 			/* send withdraw */
2537 			up_generate(peer, NULL, &addr, pt->prefixlen);
2538 			goto done;
2539 		}
2540 		if (oa == ACTION_ALLOW && na == ACTION_ALLOW) {
2541 			if (path_compare(nasp, oasp) == 0)
2542 				goto done;
2543 			/* send update */
2544 			up_generate(peer, nasp, &addr, pt->prefixlen);
2545 		}
2546 
2547 done:
2548 		if (oasp != p->aspath)
2549 			path_put(oasp);
2550 		if (nasp != p->aspath)
2551 			path_put(nasp);
2552 	}
2553 }
2554 
2555 void
2556 rde_softreconfig_in(struct rib_entry *re, void *ptr)
2557 {
2558 	struct prefix		*p, *np;
2559 	struct pt_entry		*pt;
2560 	struct rde_peer		*peer;
2561 	struct rde_aspath	*asp, *oasp, *nasp;
2562 	enum filter_actions	 oa, na;
2563 	struct bgpd_addr	 addr;
2564 	u_int16_t		 i;
2565 
2566 	pt = re->prefix;
2567 	pt_getaddr(pt, &addr);
2568 	for (p = LIST_FIRST(&re->prefix_h); p != NULL; p = np) {
2569 		np = LIST_NEXT(p, rib_l);
2570 
2571 		/* store aspath as prefix may change till we're done */
2572 		asp = p->aspath;
2573 		peer = asp->peer;
2574 
2575 		/* XXX how can this happen ??? */
2576 		if (peer->reconf_in == 0)
2577 			continue;
2578 
2579 		for (i = 1; i < rib_size; i++) {
2580 			/* only active ribs need a softreconfig rerun */
2581 			if (ribs[i].state != RECONF_KEEP)
2582 				continue;
2583 
2584 			/* check if prefix changed */
2585 			oa = rde_filter(i, &oasp, rules_l, peer, asp, &addr,
2586 			    pt->prefixlen, peer, DIR_IN);
2587 			na = rde_filter(i, &nasp, newrules, peer, asp, &addr,
2588 			    pt->prefixlen, peer, DIR_IN);
2589 			oasp = oasp != NULL ? oasp : asp;
2590 			nasp = nasp != NULL ? nasp : asp;
2591 
2592 			if (oa == ACTION_DENY && na == ACTION_DENY)
2593 				/* nothing todo */
2594 				goto done;
2595 			if (oa == ACTION_DENY && na == ACTION_ALLOW) {
2596 				/* update Local-RIB */
2597 				path_update(&ribs[i], peer, nasp, &addr,
2598 				    pt->prefixlen);
2599 				goto done;
2600 			}
2601 			if (oa == ACTION_ALLOW && na == ACTION_DENY) {
2602 				/* remove from Local-RIB */
2603 				prefix_remove(&ribs[i], peer, &addr,
2604 				    pt->prefixlen, 0);
2605 				goto done;
2606 			}
2607 			if (oa == ACTION_ALLOW && na == ACTION_ALLOW) {
2608 				if (path_compare(nasp, oasp) == 0)
2609 					goto done;
2610 				/* send update */
2611 				path_update(&ribs[i], peer, nasp, &addr,
2612 				    pt->prefixlen);
2613 			}
2614 
2615 done:
2616 			if (oasp != asp)
2617 				path_put(oasp);
2618 			if (nasp != asp)
2619 				path_put(nasp);
2620 		}
2621 	}
2622 }
2623 
2624 void
2625 rde_softreconfig_load(struct rib_entry *re, void *ptr)
2626 {
2627 	struct rib		*rib = ptr;
2628 	struct prefix		*p, *np;
2629 	struct pt_entry		*pt;
2630 	struct rde_peer		*peer;
2631 	struct rde_aspath	*asp, *nasp;
2632 	enum filter_actions	 action;
2633 	struct bgpd_addr	 addr;
2634 
2635 	pt = re->prefix;
2636 	pt_getaddr(pt, &addr);
2637 	for (p = LIST_FIRST(&re->prefix_h); p != NULL; p = np) {
2638 		np = LIST_NEXT(p, rib_l);
2639 
2640 		/* store aspath as prefix may change till we're done */
2641 		asp = p->aspath;
2642 		peer = asp->peer;
2643 
2644 		action = rde_filter(rib->id, &nasp, newrules, peer, asp, &addr,
2645 		    pt->prefixlen, peer, DIR_IN);
2646 		nasp = nasp != NULL ? nasp : asp;
2647 
2648 		if (action == ACTION_ALLOW) {
2649 			/* update Local-RIB */
2650 			path_update(rib, peer, nasp, &addr, pt->prefixlen);
2651 		}
2652 
2653 		if (nasp != asp)
2654 			path_put(nasp);
2655 	}
2656 }
2657 
2658 void
2659 rde_softreconfig_load_peer(struct rib_entry *re, void *ptr)
2660 {
2661 	struct rde_peer		*peer = ptr;
2662 	struct prefix		*p = re->active;
2663 	struct pt_entry		*pt;
2664 	struct rde_aspath	*nasp;
2665 	enum filter_actions	 na;
2666 	struct bgpd_addr	 addr;
2667 
2668 	pt = re->prefix;
2669 	pt_getaddr(pt, &addr);
2670 
2671 	/* check if prefix was announced */
2672 	if (up_test_update(peer, p) != 1)
2673 		return;
2674 
2675 	na = rde_filter(re->ribid, &nasp, newrules, peer, p->aspath,
2676 	    &addr, pt->prefixlen, p->aspath->peer, DIR_OUT);
2677 	nasp = nasp != NULL ? nasp : p->aspath;
2678 
2679 	if (na == ACTION_DENY)
2680 		/* nothing todo */
2681 		goto done;
2682 
2683 	/* send update */
2684 	up_generate(peer, nasp, &addr, pt->prefixlen);
2685 done:
2686 	if (nasp != p->aspath)
2687 		path_put(nasp);
2688 }
2689 
2690 void
2691 rde_softreconfig_unload_peer(struct rib_entry *re, void *ptr)
2692 {
2693 	struct rde_peer		*peer = ptr;
2694 	struct prefix		*p = re->active;
2695 	struct pt_entry		*pt;
2696 	struct rde_aspath	*oasp;
2697 	enum filter_actions	 oa;
2698 	struct bgpd_addr	 addr;
2699 
2700 	pt = re->prefix;
2701 	pt_getaddr(pt, &addr);
2702 
2703 	/* check if prefix was announced */
2704 	if (up_test_update(peer, p) != 1)
2705 		return;
2706 
2707 	oa = rde_filter(re->ribid, &oasp, rules_l, peer, p->aspath,
2708 	    &addr, pt->prefixlen, p->aspath->peer, DIR_OUT);
2709 	oasp = oasp != NULL ? oasp : p->aspath;
2710 
2711 	if (oa == ACTION_DENY)
2712 		/* nothing todo */
2713 		goto done;
2714 
2715 	/* send withdraw */
2716 	up_generate(peer, NULL, &addr, pt->prefixlen);
2717 done:
2718 	if (oasp != p->aspath)
2719 		path_put(oasp);
2720 }
2721 
2722 /*
2723  * update specific functions
2724  */
2725 u_char	queue_buf[4096];
2726 
2727 void
2728 rde_up_dump_upcall(struct rib_entry *re, void *ptr)
2729 {
2730 	struct rde_peer		*peer = ptr;
2731 
2732 	if (re->ribid != peer->ribid)
2733 		fatalx("King Bula: monstrous evil horror.");
2734 	if (re->active == NULL)
2735 		return;
2736 	up_generate_updates(rules_l, peer, re->active, NULL);
2737 }
2738 
2739 void
2740 rde_generate_updates(u_int16_t ribid, struct prefix *new, struct prefix *old)
2741 {
2742 	struct rde_peer			*peer;
2743 
2744 	/*
2745 	 * If old is != NULL we know it was active and should be removed.
2746 	 * If new is != NULL we know it is reachable and then we should
2747 	 * generate an update.
2748 	 */
2749 	if (old == NULL && new == NULL)
2750 		return;
2751 
2752 	LIST_FOREACH(peer, &peerlist, peer_l) {
2753 		if (peer->conf.id == 0)
2754 			continue;
2755 		if (peer->ribid != ribid)
2756 			continue;
2757 		if (peer->state != PEER_UP)
2758 			continue;
2759 		up_generate_updates(rules_l, peer, new, old);
2760 	}
2761 }
2762 
2763 void
2764 rde_update_queue_runner(void)
2765 {
2766 	struct rde_peer		*peer;
2767 	int			 r, sent, max = RDE_RUNNER_ROUNDS, eor = 0;
2768 	u_int16_t		 len, wd_len, wpos;
2769 
2770 	len = sizeof(queue_buf) - MSGSIZE_HEADER;
2771 	do {
2772 		sent = 0;
2773 		LIST_FOREACH(peer, &peerlist, peer_l) {
2774 			if (peer->conf.id == 0)
2775 				continue;
2776 			if (peer->state != PEER_UP)
2777 				continue;
2778 			/* first withdraws */
2779 			wpos = 2; /* reserve space for the length field */
2780 			r = up_dump_prefix(queue_buf + wpos, len - wpos - 2,
2781 			    &peer->withdraws[AID_INET], peer);
2782 			wd_len = r;
2783 			/* write withdraws length filed */
2784 			wd_len = htons(wd_len);
2785 			memcpy(queue_buf, &wd_len, 2);
2786 			wpos += r;
2787 
2788 			/* now bgp path attributes */
2789 			r = up_dump_attrnlri(queue_buf + wpos, len - wpos,
2790 			    peer);
2791 			switch (r) {
2792 			case -1:
2793 				eor = 1;
2794 				if (wd_len == 0) {
2795 					/* no withdraws queued just send EoR */
2796 					peer_send_eor(peer, AID_INET);
2797 					continue;
2798 				}
2799 				break;
2800 			case 2:
2801 				if (wd_len == 0) {
2802 					/*
2803 					 * No packet to send. No withdraws and
2804 					 * no path attributes. Skip.
2805 					 */
2806 					continue;
2807 				}
2808 				/* FALLTHROUGH */
2809 			default:
2810 				wpos += r;
2811 				break;
2812 			}
2813 
2814 			/* finally send message to SE */
2815 			if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
2816 			    0, -1, queue_buf, wpos) == -1)
2817 				fatal("imsg_compose error");
2818 			sent++;
2819 			if (eor) {
2820 				eor = 0;
2821 				peer_send_eor(peer, AID_INET);
2822 			}
2823 		}
2824 		max -= sent;
2825 	} while (sent != 0 && max > 0);
2826 }
2827 
2828 void
2829 rde_update6_queue_runner(u_int8_t aid)
2830 {
2831 	struct rde_peer		*peer;
2832 	u_char			*b;
2833 	int			 r, sent, max = RDE_RUNNER_ROUNDS / 2;
2834 	u_int16_t		 len;
2835 
2836 	/* first withdraws ... */
2837 	do {
2838 		sent = 0;
2839 		LIST_FOREACH(peer, &peerlist, peer_l) {
2840 			if (peer->conf.id == 0)
2841 				continue;
2842 			if (peer->state != PEER_UP)
2843 				continue;
2844 			len = sizeof(queue_buf) - MSGSIZE_HEADER;
2845 			b = up_dump_mp_unreach(queue_buf, &len, peer, aid);
2846 
2847 			if (b == NULL)
2848 				continue;
2849 			/* finally send message to SE */
2850 			if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
2851 			    0, -1, b, len) == -1)
2852 				fatal("imsg_compose error");
2853 			sent++;
2854 		}
2855 		max -= sent;
2856 	} while (sent != 0 && max > 0);
2857 
2858 	/* ... then updates */
2859 	max = RDE_RUNNER_ROUNDS / 2;
2860 	do {
2861 		sent = 0;
2862 		LIST_FOREACH(peer, &peerlist, peer_l) {
2863 			if (peer->conf.id == 0)
2864 				continue;
2865 			if (peer->state != PEER_UP)
2866 				continue;
2867 			len = sizeof(queue_buf) - MSGSIZE_HEADER;
2868 			r = up_dump_mp_reach(queue_buf, &len, peer, aid);
2869 			switch (r) {
2870 			case -2:
2871 				continue;
2872 			case -1:
2873 				peer_send_eor(peer, aid);
2874 				continue;
2875 			default:
2876 				b = queue_buf + r;
2877 				break;
2878 			}
2879 
2880 			/* finally send message to SE */
2881 			if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
2882 			    0, -1, b, len) == -1)
2883 				fatal("imsg_compose error");
2884 			sent++;
2885 		}
2886 		max -= sent;
2887 	} while (sent != 0 && max > 0);
2888 }
2889 
2890 /*
2891  * generic helper function
2892  */
2893 u_int32_t
2894 rde_local_as(void)
2895 {
2896 	return (conf->as);
2897 }
2898 
2899 int
2900 rde_noevaluate(void)
2901 {
2902 	/* do not run while cleaning up */
2903 	if (rde_quit)
2904 		return (1);
2905 
2906 	return (conf->flags & BGPD_FLAG_NO_EVALUATE);
2907 }
2908 
2909 int
2910 rde_decisionflags(void)
2911 {
2912 	return (conf->flags & BGPD_FLAG_DECISION_MASK);
2913 }
2914 
2915 int
2916 rde_as4byte(struct rde_peer *peer)
2917 {
2918 	return (peer->capa.as4byte);
2919 }
2920 
2921 /*
2922  * peer functions
2923  */
2924 struct peer_table {
2925 	struct rde_peer_head	*peer_hashtbl;
2926 	u_int32_t		 peer_hashmask;
2927 } peertable;
2928 
2929 #define PEER_HASH(x)		\
2930 	&peertable.peer_hashtbl[(x) & peertable.peer_hashmask]
2931 
2932 void
2933 peer_init(u_int32_t hashsize)
2934 {
2935 	struct peer_config pc;
2936 	u_int32_t	 hs, i;
2937 
2938 	for (hs = 1; hs < hashsize; hs <<= 1)
2939 		;
2940 	peertable.peer_hashtbl = calloc(hs, sizeof(struct rde_peer_head));
2941 	if (peertable.peer_hashtbl == NULL)
2942 		fatal("peer_init");
2943 
2944 	for (i = 0; i < hs; i++)
2945 		LIST_INIT(&peertable.peer_hashtbl[i]);
2946 	LIST_INIT(&peerlist);
2947 
2948 	peertable.peer_hashmask = hs - 1;
2949 
2950 	bzero(&pc, sizeof(pc));
2951 	snprintf(pc.descr, sizeof(pc.descr), "LOCAL");
2952 
2953 	peerself = peer_add(0, &pc);
2954 	if (peerself == NULL)
2955 		fatalx("peer_init add self");
2956 
2957 	peerself->state = PEER_UP;
2958 }
2959 
2960 void
2961 peer_shutdown(void)
2962 {
2963 	u_int32_t	i;
2964 
2965 	for (i = 0; i <= peertable.peer_hashmask; i++)
2966 		if (!LIST_EMPTY(&peertable.peer_hashtbl[i]))
2967 			log_warnx("peer_free: free non-free table");
2968 
2969 	free(peertable.peer_hashtbl);
2970 }
2971 
2972 struct rde_peer *
2973 peer_get(u_int32_t id)
2974 {
2975 	struct rde_peer_head	*head;
2976 	struct rde_peer		*peer;
2977 
2978 	head = PEER_HASH(id);
2979 
2980 	LIST_FOREACH(peer, head, hash_l) {
2981 		if (peer->conf.id == id)
2982 			return (peer);
2983 	}
2984 	return (NULL);
2985 }
2986 
2987 struct rde_peer *
2988 peer_add(u_int32_t id, struct peer_config *p_conf)
2989 {
2990 	struct rde_peer_head	*head;
2991 	struct rde_peer		*peer;
2992 
2993 	if (peer_get(id))
2994 		return (NULL);
2995 
2996 	peer = calloc(1, sizeof(struct rde_peer));
2997 	if (peer == NULL)
2998 		fatal("peer_add");
2999 
3000 	LIST_INIT(&peer->path_h);
3001 	memcpy(&peer->conf, p_conf, sizeof(struct peer_config));
3002 	peer->remote_bgpid = 0;
3003 	peer->ribid = rib_find(peer->conf.rib);
3004 	peer->state = PEER_NONE;
3005 	up_init(peer);
3006 
3007 	head = PEER_HASH(id);
3008 
3009 	LIST_INSERT_HEAD(head, peer, hash_l);
3010 	LIST_INSERT_HEAD(&peerlist, peer, peer_l);
3011 
3012 	return (peer);
3013 }
3014 
3015 void
3016 peer_localaddrs(struct rde_peer *peer, struct bgpd_addr *laddr)
3017 {
3018 	struct ifaddrs	*ifap, *ifa, *match;
3019 
3020 	if (getifaddrs(&ifap) == -1)
3021 		fatal("getifaddrs");
3022 
3023 	for (match = ifap; match != NULL; match = match->ifa_next)
3024 		if (sa_cmp(laddr, match->ifa_addr) == 0)
3025 			break;
3026 
3027 	if (match == NULL)
3028 		fatalx("peer_localaddrs: local address not found");
3029 
3030 	for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
3031 		if (ifa->ifa_addr->sa_family == AF_INET &&
3032 		    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
3033 			if (ifa->ifa_addr->sa_family ==
3034 			    match->ifa_addr->sa_family)
3035 				ifa = match;
3036 			sa2addr(ifa->ifa_addr, &peer->local_v4_addr);
3037 			break;
3038 		}
3039 	}
3040 	for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
3041 		if (ifa->ifa_addr->sa_family == AF_INET6 &&
3042 		    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
3043 			/*
3044 			 * only accept global scope addresses except explicitly
3045 			 * specified.
3046 			 */
3047 			if (ifa->ifa_addr->sa_family ==
3048 			    match->ifa_addr->sa_family)
3049 				ifa = match;
3050 			else if (IN6_IS_ADDR_LINKLOCAL(
3051 			    &((struct sockaddr_in6 *)ifa->
3052 			    ifa_addr)->sin6_addr) ||
3053 			    IN6_IS_ADDR_SITELOCAL(
3054 			    &((struct sockaddr_in6 *)ifa->
3055 			    ifa_addr)->sin6_addr))
3056 				continue;
3057 			sa2addr(ifa->ifa_addr, &peer->local_v6_addr);
3058 			break;
3059 		}
3060 	}
3061 
3062 	freeifaddrs(ifap);
3063 }
3064 
3065 void
3066 peer_up(u_int32_t id, struct session_up *sup)
3067 {
3068 	struct rde_peer	*peer;
3069 	u_int8_t	 i;
3070 
3071 	peer = peer_get(id);
3072 	if (peer == NULL) {
3073 		log_warnx("peer_up: peer id %d already exists", id);
3074 		return;
3075 	}
3076 
3077 	if (peer->state != PEER_DOWN && peer->state != PEER_NONE)
3078 		fatalx("peer_up: bad state");
3079 	peer->remote_bgpid = ntohl(sup->remote_bgpid);
3080 	peer->short_as = sup->short_as;
3081 	memcpy(&peer->remote_addr, &sup->remote_addr,
3082 	    sizeof(peer->remote_addr));
3083 	memcpy(&peer->capa, &sup->capa, sizeof(peer->capa));
3084 
3085 	peer_localaddrs(peer, &sup->local_addr);
3086 
3087 	peer->state = PEER_UP;
3088 	up_init(peer);
3089 
3090 	if (rde_noevaluate())
3091 		/*
3092 		 * no need to dump the table to the peer, there are no active
3093 		 * prefixes anyway. This is a speed up hack.
3094 		 */
3095 		return;
3096 
3097 	for (i = 0; i < AID_MAX; i++) {
3098 		if (peer->capa.mp[i])
3099 			peer_dump(id, i);
3100 	}
3101 }
3102 
3103 void
3104 peer_down(u_int32_t id)
3105 {
3106 	struct rde_peer		*peer;
3107 	struct rde_aspath	*asp, *nasp;
3108 
3109 	peer = peer_get(id);
3110 	if (peer == NULL) {
3111 		log_warnx("peer_down: unknown peer id %d", id);
3112 		return;
3113 	}
3114 	peer->remote_bgpid = 0;
3115 	peer->state = PEER_DOWN;
3116 	up_down(peer);
3117 
3118 	/* walk through per peer RIB list and remove all prefixes. */
3119 	for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) {
3120 		nasp = LIST_NEXT(asp, peer_l);
3121 		path_remove(asp);
3122 	}
3123 	LIST_INIT(&peer->path_h);
3124 	peer->prefix_cnt = 0;
3125 
3126 	/* Deletions are performed in path_remove() */
3127 	rde_send_pftable_commit();
3128 
3129 	LIST_REMOVE(peer, hash_l);
3130 	LIST_REMOVE(peer, peer_l);
3131 	free(peer);
3132 }
3133 
3134 void
3135 peer_dump(u_int32_t id, u_int8_t aid)
3136 {
3137 	struct rde_peer		*peer;
3138 
3139 	peer = peer_get(id);
3140 	if (peer == NULL) {
3141 		log_warnx("peer_dump: unknown peer id %d", id);
3142 		return;
3143 	}
3144 
3145 	if (peer->conf.announce_type == ANNOUNCE_DEFAULT_ROUTE)
3146 		up_generate_default(rules_l, peer, aid);
3147 	else
3148 		rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, peer, aid);
3149 	if (peer->capa.restart)
3150 		up_generate_marker(peer, aid);
3151 }
3152 
3153 /* End-of-RIB marker, RFC 4724 */
3154 void
3155 peer_send_eor(struct rde_peer *peer, u_int8_t aid)
3156 {
3157 	u_int16_t	afi;
3158 	u_int8_t	safi;
3159 
3160 	if (aid == AID_INET) {
3161 		u_char null[4];
3162 
3163 		bzero(&null, 4);
3164 		if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3165 		    0, -1, &null, 4) == -1)
3166 			fatal("imsg_compose error in peer_send_eor");
3167 	} else {
3168 		u_int16_t	i;
3169 		u_char		buf[10];
3170 
3171 		if (aid2afi(aid, &afi, &safi) == -1)
3172 			fatalx("peer_send_eor: bad AID");
3173 
3174 		i = 0;	/* v4 withdrawn len */
3175 		bcopy(&i, &buf[0], sizeof(i));
3176 		i = htons(6);	/* path attr len */
3177 		bcopy(&i, &buf[2], sizeof(i));
3178 		buf[4] = ATTR_OPTIONAL;
3179 		buf[5] = ATTR_MP_UNREACH_NLRI;
3180 		buf[6] = 3;	/* withdrawn len */
3181 		i = htons(afi);
3182 		bcopy(&i, &buf[7], sizeof(i));
3183 		buf[9] = safi;
3184 
3185 		if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3186 		    0, -1, &buf, 10) == -1)
3187 			fatal("imsg_compose error in peer_send_eor");
3188 	}
3189 }
3190 
3191 /*
3192  * network announcement stuff
3193  */
3194 void
3195 network_add(struct network_config *nc, int flagstatic)
3196 {
3197 	struct rdomain		*rd;
3198 	struct rde_aspath	*asp;
3199 	struct filter_set_head	*vpnset = NULL;
3200 	in_addr_t		 prefix4;
3201 	u_int16_t		 i;
3202 
3203 	if (nc->rtableid) {
3204 		SIMPLEQ_FOREACH(rd, rdomains_l, entry) {
3205 			if (rd->rtableid != nc->rtableid)
3206 				continue;
3207 			switch (nc->prefix.aid) {
3208 			case AID_INET:
3209 				prefix4 = nc->prefix.v4.s_addr;
3210 				bzero(&nc->prefix, sizeof(nc->prefix));
3211 				nc->prefix.aid = AID_VPN_IPv4;
3212 				nc->prefix.vpn4.rd = rd->rd;
3213 				nc->prefix.vpn4.addr.s_addr = prefix4;
3214 				nc->prefix.vpn4.labellen = 3;
3215 				nc->prefix.vpn4.labelstack[0] =
3216 				    (rd->label >> 12) & 0xff;
3217 				nc->prefix.vpn4.labelstack[1] =
3218 				    (rd->label >> 4) & 0xff;
3219 				nc->prefix.vpn4.labelstack[2] =
3220 				    (rd->label << 4) & 0xf0;
3221 				nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS;
3222 				vpnset = &rd->export;
3223 				break;
3224 			default:
3225 				log_warnx("unable to VPNize prefix");
3226 				filterset_free(&nc->attrset);
3227 				return;
3228 			}
3229 		}
3230 	}
3231 
3232 	asp = path_get();
3233 	asp->aspath = aspath_get(NULL, 0);
3234 	asp->origin = ORIGIN_IGP;
3235 	asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH |
3236 	    F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED;
3237 	/* the nexthop is unset unless a default set overrides it */
3238 	if (!flagstatic)
3239 		asp->flags |= F_ANN_DYNAMIC;
3240 
3241 	rde_apply_set(asp, &nc->attrset, nc->prefix.aid, peerself, peerself);
3242 	if (vpnset)
3243 		rde_apply_set(asp, vpnset, nc->prefix.aid, peerself, peerself);
3244 	for (i = 1; i < rib_size; i++)
3245 		path_update(&ribs[i], peerself, asp, &nc->prefix,
3246 		    nc->prefixlen);
3247 
3248 	path_put(asp);
3249 	filterset_free(&nc->attrset);
3250 }
3251 
3252 void
3253 network_delete(struct network_config *nc, int flagstatic)
3254 {
3255 	struct rdomain	*rd;
3256 	in_addr_t	 prefix4;
3257 	u_int32_t	 flags = F_PREFIX_ANNOUNCED;
3258 	u_int32_t	 i;
3259 
3260 	if (!flagstatic)
3261 		flags |= F_ANN_DYNAMIC;
3262 
3263 	if (nc->rtableid) {
3264 		SIMPLEQ_FOREACH(rd, rdomains_l, entry) {
3265 			if (rd->rtableid != nc->rtableid)
3266 				continue;
3267 			switch (nc->prefix.aid) {
3268 			case AID_INET:
3269 				prefix4 = nc->prefix.v4.s_addr;
3270 				bzero(&nc->prefix, sizeof(nc->prefix));
3271 				nc->prefix.aid = AID_VPN_IPv4;
3272 				nc->prefix.vpn4.rd = rd->rd;
3273 				nc->prefix.vpn4.addr.s_addr = prefix4;
3274 				nc->prefix.vpn4.labellen = 3;
3275 				nc->prefix.vpn4.labelstack[0] =
3276 				    (rd->label >> 12) & 0xff;
3277 				nc->prefix.vpn4.labelstack[1] =
3278 				    (rd->label >> 4) & 0xff;
3279 				nc->prefix.vpn4.labelstack[2] =
3280 				    (rd->label << 4) & 0xf0;
3281 				nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS;
3282 				break;
3283 			default:
3284 				log_warnx("unable to VPNize prefix");
3285 				return;
3286 			}
3287 		}
3288 	}
3289 
3290 	for (i = rib_size - 1; i > 0; i--)
3291 		prefix_remove(&ribs[i], peerself, &nc->prefix, nc->prefixlen,
3292 		    flags);
3293 }
3294 
3295 void
3296 network_dump_upcall(struct rib_entry *re, void *ptr)
3297 {
3298 	struct prefix		*p;
3299 	struct kroute_full	 k;
3300 	struct bgpd_addr	 addr;
3301 	struct rde_dump_ctx	*ctx = ptr;
3302 
3303 	LIST_FOREACH(p, &re->prefix_h, rib_l) {
3304 		if (!(p->aspath->flags & F_PREFIX_ANNOUNCED))
3305 			continue;
3306 		pt_getaddr(p->prefix, &addr);
3307 
3308 		bzero(&k, sizeof(k));
3309 		memcpy(&k.prefix, &addr, sizeof(k.prefix));
3310 		if (p->aspath->nexthop == NULL ||
3311 		    p->aspath->nexthop->state != NEXTHOP_REACH)
3312 			k.nexthop.aid = k.prefix.aid;
3313 		else
3314 			memcpy(&k.nexthop, &p->aspath->nexthop->true_nexthop,
3315 			    sizeof(k.nexthop));
3316 		k.prefixlen = p->prefix->prefixlen;
3317 		k.flags = F_KERNEL;
3318 		if ((p->aspath->flags & F_ANN_DYNAMIC) == 0)
3319 			k.flags = F_STATIC;
3320 		if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0,
3321 		    ctx->req.pid, -1, &k, sizeof(k)) == -1)
3322 			log_warnx("network_dump_upcall: "
3323 			    "imsg_compose error");
3324 	}
3325 }
3326 
3327 /* clean up */
3328 void
3329 rde_shutdown(void)
3330 {
3331 	struct rde_peer		*p;
3332 	struct filter_rule	*r;
3333 	u_int32_t		 i;
3334 
3335 	/*
3336 	 * the decision process is turned off if rde_quit = 1 and
3337 	 * rde_shutdown depends on this.
3338 	 */
3339 
3340 	/*
3341 	 * All peers go down
3342 	 */
3343 	for (i = 0; i <= peertable.peer_hashmask; i++)
3344 		while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
3345 			peer_down(p->conf.id);
3346 
3347 	/* free filters */
3348 	while ((r = TAILQ_FIRST(rules_l)) != NULL) {
3349 		TAILQ_REMOVE(rules_l, r, entry);
3350 		filterset_free(&r->set);
3351 		free(r);
3352 	}
3353 	free(rules_l);
3354 
3355 	nexthop_shutdown();
3356 	path_shutdown();
3357 	aspath_shutdown();
3358 	attr_shutdown();
3359 	pt_shutdown();
3360 	peer_shutdown();
3361 }
3362 
3363 int
3364 sa_cmp(struct bgpd_addr *a, struct sockaddr *b)
3365 {
3366 	struct sockaddr_in	*in_b;
3367 	struct sockaddr_in6	*in6_b;
3368 
3369 	if (aid2af(a->aid) != b->sa_family)
3370 		return (1);
3371 
3372 	switch (b->sa_family) {
3373 	case AF_INET:
3374 		in_b = (struct sockaddr_in *)b;
3375 		if (a->v4.s_addr != in_b->sin_addr.s_addr)
3376 			return (1);
3377 		break;
3378 	case AF_INET6:
3379 		in6_b = (struct sockaddr_in6 *)b;
3380 #ifdef __KAME__
3381 		/* directly stolen from sbin/ifconfig/ifconfig.c */
3382 		if (IN6_IS_ADDR_LINKLOCAL(&in6_b->sin6_addr)) {
3383 			in6_b->sin6_scope_id =
3384 			    ntohs(*(u_int16_t *)&in6_b->sin6_addr.s6_addr[2]);
3385 			in6_b->sin6_addr.s6_addr[2] =
3386 			    in6_b->sin6_addr.s6_addr[3] = 0;
3387 		}
3388 #endif
3389 		if (bcmp(&a->v6, &in6_b->sin6_addr,
3390 		    sizeof(struct in6_addr)))
3391 			return (1);
3392 		break;
3393 	default:
3394 		fatal("king bula sez: unknown address family");
3395 		/* NOTREACHED */
3396 	}
3397 
3398 	return (0);
3399 }
3400