xref: /openbsd-src/usr.sbin/bgpd/rde.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: rde.c,v 1.350 2016/09/03 16:22:17 renato Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/socket.h>
21 #include <sys/time.h>
22 #include <sys/resource.h>
23 
24 #include <errno.h>
25 #include <ifaddrs.h>
26 #include <pwd.h>
27 #include <poll.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 #include <err.h>
34 
35 #include "bgpd.h"
36 #include "mrt.h"
37 #include "rde.h"
38 #include "session.h"
39 
40 #define PFD_PIPE_MAIN		0
41 #define PFD_PIPE_SESSION	1
42 #define PFD_PIPE_SESSION_CTL	2
43 #define PFD_PIPE_COUNT		3
44 
45 void		 rde_sighdlr(int);
46 void		 rde_dispatch_imsg_session(struct imsgbuf *);
47 void		 rde_dispatch_imsg_parent(struct imsgbuf *);
48 int		 rde_update_dispatch(struct imsg *);
49 void		 rde_update_update(struct rde_peer *, struct rde_aspath *,
50 		     struct bgpd_addr *, u_int8_t);
51 void		 rde_update_withdraw(struct rde_peer *, struct bgpd_addr *,
52 		     u_int8_t);
53 int		 rde_attr_parse(u_char *, u_int16_t, struct rde_peer *,
54 		     struct rde_aspath *, struct mpattr *);
55 int		 rde_attr_add(struct rde_aspath *, u_char *, u_int16_t);
56 u_int8_t	 rde_attr_missing(struct rde_aspath *, int, u_int16_t);
57 int		 rde_get_mp_nexthop(u_char *, u_int16_t, u_int8_t,
58 		     struct rde_aspath *);
59 int		 rde_update_extract_prefix(u_char *, u_int16_t, void *,
60 		     u_int8_t, u_int8_t);
61 int		 rde_update_get_prefix(u_char *, u_int16_t, struct bgpd_addr *,
62 		     u_int8_t *);
63 int		 rde_update_get_prefix6(u_char *, u_int16_t, struct bgpd_addr *,
64 		     u_int8_t *);
65 int		 rde_update_get_vpn4(u_char *, u_int16_t, struct bgpd_addr *,
66 		     u_int8_t *);
67 void		 rde_update_err(struct rde_peer *, u_int8_t , u_int8_t,
68 		     void *, u_int16_t);
69 void		 rde_update_log(const char *, u_int16_t,
70 		     const struct rde_peer *, const struct bgpd_addr *,
71 		     const struct bgpd_addr *, u_int8_t);
72 void		 rde_as4byte_fixup(struct rde_peer *, struct rde_aspath *);
73 void		 rde_reflector(struct rde_peer *, struct rde_aspath *);
74 
75 void		 rde_dump_rib_as(struct prefix *, struct rde_aspath *,pid_t,
76 		     int);
77 void		 rde_dump_filter(struct prefix *,
78 		     struct ctl_show_rib_request *);
79 void		 rde_dump_filterout(struct rde_peer *, struct prefix *,
80 		     struct ctl_show_rib_request *);
81 void		 rde_dump_upcall(struct rib_entry *, void *);
82 void		 rde_dump_prefix_upcall(struct rib_entry *, void *);
83 void		 rde_dump_ctx_new(struct ctl_show_rib_request *, pid_t,
84 		     enum imsg_type);
85 void		 rde_dump_mrt_new(struct mrt *, pid_t, int);
86 void		 rde_dump_done(void *);
87 
88 int		 rde_rdomain_import(struct rde_aspath *, struct rdomain *);
89 void		 rde_reload_done(void);
90 void		 rde_softreconfig_out(struct rib_entry *, void *);
91 void		 rde_softreconfig_in(struct rib_entry *, void *);
92 void		 rde_softreconfig_unload_peer(struct rib_entry *, void *);
93 void		 rde_up_dump_upcall(struct rib_entry *, void *);
94 void		 rde_update_queue_runner(void);
95 void		 rde_update6_queue_runner(u_int8_t);
96 
97 void		 peer_init(u_int32_t);
98 void		 peer_shutdown(void);
99 int		 peer_localaddrs(struct rde_peer *, struct bgpd_addr *);
100 struct rde_peer	*peer_add(u_int32_t, struct peer_config *);
101 struct rde_peer	*peer_get(u_int32_t);
102 void		 peer_up(u_int32_t, struct session_up *);
103 void		 peer_down(u_int32_t);
104 void		 peer_flush(struct rde_peer *, u_int8_t);
105 void		 peer_stale(u_int32_t, u_int8_t);
106 void		 peer_recv_eor(struct rde_peer *, u_int8_t);
107 void		 peer_dump(u_int32_t, u_int8_t);
108 void		 peer_send_eor(struct rde_peer *, u_int8_t);
109 
110 void		 network_add(struct network_config *, int);
111 void		 network_delete(struct network_config *, int);
112 void		 network_dump_upcall(struct rib_entry *, void *);
113 
114 void		 rde_shutdown(void);
115 int		 sa_cmp(struct bgpd_addr *, struct sockaddr *);
116 
117 volatile sig_atomic_t	 rde_quit = 0;
118 struct bgpd_config	*conf, *nconf;
119 time_t			 reloadtime;
120 struct rde_peer_head	 peerlist;
121 struct rde_peer		*peerself;
122 struct filter_head	*out_rules, *out_rules_tmp;
123 struct rdomain_head	*rdomains_l, *newdomains;
124 struct imsgbuf		*ibuf_se;
125 struct imsgbuf		*ibuf_se_ctl;
126 struct imsgbuf		*ibuf_main;
127 struct rde_memstats	 rdemem;
128 
129 struct rde_dump_ctx {
130 	struct rib_context		ribctx;
131 	struct ctl_show_rib_request	req;
132 	sa_family_t			af;
133 };
134 
135 struct rde_mrt_ctx {
136 	struct mrt		mrt;
137 	struct rib_context	ribctx;
138 	LIST_ENTRY(rde_mrt_ctx)	entry;
139 };
140 
141 LIST_HEAD(, rde_mrt_ctx) rde_mrts = LIST_HEAD_INITIALIZER(rde_mrts);
142 u_int rde_mrt_cnt;
143 
144 void
145 rde_sighdlr(int sig)
146 {
147 	switch (sig) {
148 	case SIGINT:
149 	case SIGTERM:
150 		rde_quit = 1;
151 		break;
152 	}
153 }
154 
155 u_int32_t	peerhashsize = 64;
156 u_int32_t	pathhashsize = 1024;
157 u_int32_t	attrhashsize = 512;
158 u_int32_t	nexthophashsize = 64;
159 
160 void
161 rde_main(int debug, int verbose)
162 {
163 	struct passwd		*pw;
164 	struct pollfd		*pfd = NULL;
165 	struct rde_mrt_ctx	*mctx, *xmctx;
166 	void			*newp;
167 	u_int			 pfd_elms = 0, i, j;
168 	int			 timeout;
169 	u_int8_t		 aid;
170 
171 	bgpd_process = PROC_RDE;
172 	log_procname = log_procnames[bgpd_process];
173 
174 	log_init(debug);
175 	log_verbose(verbose);
176 
177 	if ((pw = getpwnam(BGPD_USER)) == NULL)
178 		fatal("getpwnam");
179 
180 	if (chroot(pw->pw_dir) == -1)
181 		fatal("chroot");
182 	if (chdir("/") == -1)
183 		fatal("chdir(\"/\")");
184 
185 	setproctitle("route decision engine");
186 
187 	if (setgroups(1, &pw->pw_gid) ||
188 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
189 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
190 		fatal("can't drop privileges");
191 
192 	if (pledge("stdio route recvfd", NULL) == -1)
193 		fatal("pledge");
194 
195 	signal(SIGTERM, rde_sighdlr);
196 	signal(SIGINT, rde_sighdlr);
197 	signal(SIGPIPE, SIG_IGN);
198 	signal(SIGHUP, SIG_IGN);
199 	signal(SIGALRM, SIG_IGN);
200 	signal(SIGUSR1, SIG_IGN);
201 
202 	/* initialize the RIB structures */
203 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
204 		fatal(NULL);
205 	imsg_init(ibuf_main, 3);
206 
207 	pt_init();
208 	path_init(pathhashsize);
209 	aspath_init(pathhashsize);
210 	attr_init(attrhashsize);
211 	nexthop_init(nexthophashsize);
212 	peer_init(peerhashsize);
213 
214 	out_rules = calloc(1, sizeof(struct filter_head));
215 	if (out_rules == NULL)
216 		fatal(NULL);
217 	TAILQ_INIT(out_rules);
218 	rdomains_l = calloc(1, sizeof(struct rdomain_head));
219 	if (rdomains_l == NULL)
220 		fatal(NULL);
221 	SIMPLEQ_INIT(rdomains_l);
222 	if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL)
223 		fatal(NULL);
224 	log_info("route decision engine ready");
225 
226 	while (rde_quit == 0) {
227 		if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) {
228 			if ((newp = reallocarray(pfd,
229 			    PFD_PIPE_COUNT + rde_mrt_cnt,
230 			    sizeof(struct pollfd))) == NULL) {
231 				/* panic for now  */
232 				log_warn("could not resize pfd from %u -> %u"
233 				    " entries", pfd_elms, PFD_PIPE_COUNT +
234 				    rde_mrt_cnt);
235 				fatalx("exiting");
236 			}
237 			pfd = newp;
238 			pfd_elms = PFD_PIPE_COUNT + rde_mrt_cnt;
239 		}
240 		timeout = INFTIM;
241 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
242 
243 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
244 		set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
245 		set_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl);
246 
247 		if (rib_dump_pending() &&
248 		    ibuf_se_ctl && ibuf_se_ctl->w.queued == 0)
249 			timeout = 0;
250 
251 		i = PFD_PIPE_COUNT;
252 		for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) {
253 			xmctx = LIST_NEXT(mctx, entry);
254 			if (mctx->mrt.wbuf.queued) {
255 				pfd[i].fd = mctx->mrt.wbuf.fd;
256 				pfd[i].events = POLLOUT;
257 				i++;
258 			} else if (mctx->mrt.state == MRT_STATE_REMOVE) {
259 				close(mctx->mrt.wbuf.fd);
260 				LIST_REMOVE(&mctx->ribctx, entry);
261 				LIST_REMOVE(mctx, entry);
262 				free(mctx);
263 				rde_mrt_cnt--;
264 			}
265 		}
266 
267 		if (poll(pfd, i, timeout) == -1) {
268 			if (errno != EINTR)
269 				fatal("poll error");
270 			continue;
271 		}
272 
273 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1)
274 			fatalx("Lost connection to parent");
275 		else
276 			rde_dispatch_imsg_parent(ibuf_main);
277 
278 		if (handle_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se) == -1) {
279 			log_warnx("RDE: Lost connection to SE");
280 			msgbuf_clear(&ibuf_se->w);
281 			free(ibuf_se);
282 			ibuf_se = NULL;
283 		} else
284 			rde_dispatch_imsg_session(ibuf_se);
285 
286 		if (handle_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl) ==
287 		    -1) {
288 			log_warnx("RDE: Lost connection to SE control");
289 			msgbuf_clear(&ibuf_se_ctl->w);
290 			free(ibuf_se_ctl);
291 			ibuf_se_ctl = NULL;
292 		} else
293 			rde_dispatch_imsg_session(ibuf_se_ctl);
294 
295 		for (j = PFD_PIPE_COUNT, mctx = LIST_FIRST(&rde_mrts);
296 		    j < i && mctx != 0; j++) {
297 			if (pfd[j].fd == mctx->mrt.wbuf.fd &&
298 			    pfd[j].revents & POLLOUT)
299 				mrt_write(&mctx->mrt);
300 			mctx = LIST_NEXT(mctx, entry);
301 		}
302 
303 		rde_update_queue_runner();
304 		for (aid = AID_INET6; aid < AID_MAX; aid++)
305 			rde_update6_queue_runner(aid);
306 		if (rib_dump_pending() &&
307 		    ibuf_se_ctl && ibuf_se_ctl->w.queued <= 10)
308 			rib_dump_runner();
309 	}
310 
311 	/* close pipes */
312 	if (ibuf_se) {
313 		msgbuf_clear(&ibuf_se->w);
314 		close(ibuf_se->fd);
315 		free(ibuf_se);
316 	}
317 	if (ibuf_se_ctl) {
318 		msgbuf_clear(&ibuf_se_ctl->w);
319 		close(ibuf_se_ctl->fd);
320 		free(ibuf_se_ctl);
321 	}
322 	msgbuf_clear(&ibuf_main->w);
323 	close(ibuf_main->fd);
324 	free(ibuf_main);
325 
326 	/* do not clean up on shutdown on production, it takes ages. */
327 	if (debug)
328 		rde_shutdown();
329 
330 	while ((mctx = LIST_FIRST(&rde_mrts)) != NULL) {
331 		msgbuf_clear(&mctx->mrt.wbuf);
332 		close(mctx->mrt.wbuf.fd);
333 		LIST_REMOVE(&mctx->ribctx, entry);
334 		LIST_REMOVE(mctx, entry);
335 		free(mctx);
336 	}
337 
338 
339 	log_info("route decision engine exiting");
340 	exit(0);
341 }
342 
343 struct network_config	 netconf_s, netconf_p;
344 struct filter_set_head	*session_set, *parent_set;
345 
346 void
347 rde_dispatch_imsg_session(struct imsgbuf *ibuf)
348 {
349 	struct imsg		 imsg;
350 	struct peer		 p;
351 	struct peer_config	 pconf;
352 	struct session_up	 sup;
353 	struct ctl_show_rib	 csr;
354 	struct ctl_show_rib_request	req;
355 	struct rde_peer		*peer;
356 	struct rde_aspath	*asp;
357 	struct filter_set	*s;
358 	struct nexthop		*nh;
359 	u_int8_t		*asdata;
360 	ssize_t			 n;
361 	int			 verbose;
362 	u_int16_t		 len;
363 	u_int8_t		 aid;
364 
365 	while (ibuf) {
366 		if ((n = imsg_get(ibuf, &imsg)) == -1)
367 			fatal("rde_dispatch_imsg_session: imsg_get error");
368 		if (n == 0)
369 			break;
370 
371 		switch (imsg.hdr.type) {
372 		case IMSG_UPDATE:
373 			rde_update_dispatch(&imsg);
374 			break;
375 		case IMSG_SESSION_ADD:
376 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(pconf))
377 				fatalx("incorrect size of session request");
378 			memcpy(&pconf, imsg.data, sizeof(pconf));
379 			peer_add(imsg.hdr.peerid, &pconf);
380 			break;
381 		case IMSG_SESSION_UP:
382 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup))
383 				fatalx("incorrect size of session request");
384 			memcpy(&sup, imsg.data, sizeof(sup));
385 			peer_up(imsg.hdr.peerid, &sup);
386 			break;
387 		case IMSG_SESSION_DOWN:
388 			peer_down(imsg.hdr.peerid);
389 			break;
390 		case IMSG_SESSION_STALE:
391 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
392 				log_warnx("rde_dispatch: wrong imsg len");
393 				break;
394 			}
395 			memcpy(&aid, imsg.data, sizeof(aid));
396 			if (aid >= AID_MAX)
397 				fatalx("IMSG_SESSION_STALE: bad AID");
398 			peer_stale(imsg.hdr.peerid, aid);
399 			break;
400 		case IMSG_SESSION_FLUSH:
401 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
402 				log_warnx("rde_dispatch: wrong imsg len");
403 				break;
404 			}
405 			memcpy(&aid, imsg.data, sizeof(aid));
406 			if (aid >= AID_MAX)
407 				fatalx("IMSG_SESSION_FLUSH: bad AID");
408 			if ((peer = peer_get(imsg.hdr.peerid)) == NULL) {
409 				log_warnx("rde_dispatch: unknown peer id %d",
410 				    imsg.hdr.peerid);
411 				break;
412 			}
413 			peer_flush(peer, aid);
414 			break;
415 		case IMSG_SESSION_RESTARTED:
416 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
417 				log_warnx("rde_dispatch: wrong imsg len");
418 				break;
419 			}
420 			memcpy(&aid, imsg.data, sizeof(aid));
421 			if (aid >= AID_MAX)
422 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
423 			if ((peer = peer_get(imsg.hdr.peerid)) == NULL) {
424 				log_warnx("rde_dispatch: unknown peer id %d",
425 				    imsg.hdr.peerid);
426 				break;
427 			}
428 			if (peer->staletime[aid])
429 				peer_flush(peer, aid);
430 			break;
431 		case IMSG_REFRESH:
432 			if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
433 				log_warnx("rde_dispatch: wrong imsg len");
434 				break;
435 			}
436 			memcpy(&aid, imsg.data, sizeof(aid));
437 			if (aid >= AID_MAX)
438 				fatalx("IMSG_REFRESH: bad AID");
439 			peer_dump(imsg.hdr.peerid, aid);
440 			break;
441 		case IMSG_NETWORK_ADD:
442 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
443 			    sizeof(struct network_config)) {
444 				log_warnx("rde_dispatch: wrong imsg len");
445 				break;
446 			}
447 			memcpy(&netconf_s, imsg.data, sizeof(netconf_s));
448 			TAILQ_INIT(&netconf_s.attrset);
449 			session_set = &netconf_s.attrset;
450 			break;
451 		case IMSG_NETWORK_ASPATH:
452 			if (imsg.hdr.len - IMSG_HEADER_SIZE <
453 			    sizeof(struct ctl_show_rib)) {
454 				log_warnx("rde_dispatch: wrong imsg len");
455 				bzero(&netconf_s, sizeof(netconf_s));
456 				break;
457 			}
458 			asdata = imsg.data;
459 			asdata += sizeof(struct ctl_show_rib);
460 			memcpy(&csr, imsg.data, sizeof(csr));
461 			if (csr.aspath_len + sizeof(csr) > imsg.hdr.len -
462 			    IMSG_HEADER_SIZE) {
463 				log_warnx("rde_dispatch: wrong aspath len");
464 				bzero(&netconf_s, sizeof(netconf_s));
465 				break;
466 			}
467 			asp = path_get();
468 			asp->lpref = csr.local_pref;
469 			asp->med = csr.med;
470 			asp->weight = csr.weight;
471 			asp->flags = csr.flags;
472 			asp->origin = csr.origin;
473 			asp->flags |= F_PREFIX_ANNOUNCED | F_ANN_DYNAMIC;
474 			asp->aspath = aspath_get(asdata, csr.aspath_len);
475 			netconf_s.asp = asp;
476 			break;
477 		case IMSG_NETWORK_ATTR:
478 			if (imsg.hdr.len <= IMSG_HEADER_SIZE) {
479 				log_warnx("rde_dispatch: wrong imsg len");
480 				break;
481 			}
482 			/* parse path attributes */
483 			len = imsg.hdr.len - IMSG_HEADER_SIZE;
484 			asp = netconf_s.asp;
485 			if (rde_attr_add(asp, imsg.data, len) == -1) {
486 				log_warnx("rde_dispatch: bad network "
487 				    "attribute");
488 				path_put(asp);
489 				bzero(&netconf_s, sizeof(netconf_s));
490 				break;
491 			}
492 			break;
493 		case IMSG_NETWORK_DONE:
494 			if (imsg.hdr.len != IMSG_HEADER_SIZE) {
495 				log_warnx("rde_dispatch: wrong imsg len");
496 				break;
497 			}
498 			session_set = NULL;
499 			switch (netconf_s.prefix.aid) {
500 			case AID_INET:
501 				if (netconf_s.prefixlen > 32)
502 					goto badnet;
503 				network_add(&netconf_s, 0);
504 				break;
505 			case AID_INET6:
506 				if (netconf_s.prefixlen > 128)
507 					goto badnet;
508 				network_add(&netconf_s, 0);
509 				break;
510 			case 0:
511 				/* something failed beforehands */
512 				break;
513 			default:
514 badnet:
515 				log_warnx("rde_dispatch: bad network");
516 				break;
517 			}
518 			break;
519 		case IMSG_NETWORK_REMOVE:
520 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
521 			    sizeof(struct network_config)) {
522 				log_warnx("rde_dispatch: wrong imsg len");
523 				break;
524 			}
525 			memcpy(&netconf_s, imsg.data, sizeof(netconf_s));
526 			TAILQ_INIT(&netconf_s.attrset);
527 			network_delete(&netconf_s, 0);
528 			break;
529 		case IMSG_NETWORK_FLUSH:
530 			if (imsg.hdr.len != IMSG_HEADER_SIZE) {
531 				log_warnx("rde_dispatch: wrong imsg len");
532 				break;
533 			}
534 			prefix_network_clean(peerself, time(NULL),
535 			    F_ANN_DYNAMIC);
536 			break;
537 		case IMSG_FILTER_SET:
538 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
539 			    sizeof(struct filter_set)) {
540 				log_warnx("rde_dispatch: wrong imsg len");
541 				break;
542 			}
543 			if (session_set == NULL) {
544 				log_warnx("rde_dispatch: "
545 				    "IMSG_FILTER_SET unexpected");
546 				break;
547 			}
548 			if ((s = malloc(sizeof(struct filter_set))) == NULL)
549 				fatal(NULL);
550 			memcpy(s, imsg.data, sizeof(struct filter_set));
551 			TAILQ_INSERT_TAIL(session_set, s, entry);
552 
553 			if (s->type == ACTION_SET_NEXTHOP) {
554 				nh = nexthop_get(&s->action.nexthop);
555 				nh->refcnt++;
556 			}
557 			break;
558 		case IMSG_CTL_SHOW_NETWORK:
559 		case IMSG_CTL_SHOW_RIB:
560 		case IMSG_CTL_SHOW_RIB_AS:
561 		case IMSG_CTL_SHOW_RIB_COMMUNITY:
562 		case IMSG_CTL_SHOW_RIB_PREFIX:
563 			if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(req)) {
564 				log_warnx("rde_dispatch: wrong imsg len");
565 				break;
566 			}
567 			memcpy(&req, imsg.data, sizeof(req));
568 			rde_dump_ctx_new(&req, imsg.hdr.pid, imsg.hdr.type);
569 			break;
570 		case IMSG_CTL_SHOW_NEIGHBOR:
571 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
572 			    sizeof(struct peer)) {
573 				log_warnx("rde_dispatch: wrong imsg len");
574 				break;
575 			}
576 			memcpy(&p, imsg.data, sizeof(struct peer));
577 			peer = peer_get(p.conf.id);
578 			if (peer != NULL) {
579 				p.stats.prefix_cnt = peer->prefix_cnt;
580 				p.stats.prefix_rcvd_update =
581 				    peer->prefix_rcvd_update;
582 				p.stats.prefix_rcvd_withdraw =
583 				    peer->prefix_rcvd_withdraw;
584 				p.stats.prefix_rcvd_eor =
585 				    peer->prefix_rcvd_eor;
586 				p.stats.prefix_sent_update =
587 				    peer->prefix_sent_update;
588 				p.stats.prefix_sent_withdraw =
589 				    peer->prefix_sent_withdraw;
590 				p.stats.prefix_sent_eor =
591 				    peer->prefix_sent_eor;
592 			}
593 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0,
594 			    imsg.hdr.pid, -1, &p, sizeof(struct peer));
595 			break;
596 		case IMSG_CTL_END:
597 			imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, imsg.hdr.pid,
598 			    -1, NULL, 0);
599 			break;
600 		case IMSG_CTL_SHOW_RIB_MEM:
601 			imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0,
602 			    imsg.hdr.pid, -1, &rdemem, sizeof(rdemem));
603 			break;
604 		case IMSG_CTL_LOG_VERBOSE:
605 			/* already checked by SE */
606 			memcpy(&verbose, imsg.data, sizeof(verbose));
607 			log_verbose(verbose);
608 			break;
609 		default:
610 			break;
611 		}
612 		imsg_free(&imsg);
613 	}
614 }
615 
616 void
617 rde_dispatch_imsg_parent(struct imsgbuf *ibuf)
618 {
619 	static struct rdomain	*rd;
620 	struct imsg		 imsg;
621 	struct mrt		 xmrt;
622 	struct rde_rib		 rn;
623 	struct imsgbuf		*i;
624 	struct filter_head	*nr;
625 	struct filter_rule	*r;
626 	struct filter_set	*s;
627 	struct nexthop		*nh;
628 	int			 n, fd;
629 	u_int16_t		 rid;
630 
631 	while (ibuf) {
632 		if ((n = imsg_get(ibuf, &imsg)) == -1)
633 			fatal("rde_dispatch_imsg_parent: imsg_get error");
634 		if (n == 0)
635 			break;
636 
637 		switch (imsg.hdr.type) {
638 		case IMSG_SOCKET_CONN:
639 		case IMSG_SOCKET_CONN_CTL:
640 			if ((fd = imsg.fd) == -1) {
641 				log_warnx("expected to receive imsg fd to "
642 				    "SE but didn't receive any");
643 				break;
644 			}
645 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
646 				fatal(NULL);
647 			imsg_init(i, fd);
648 			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
649 				if (ibuf_se) {
650 					log_warnx("Unexpected imsg connection "
651 					    "to SE received");
652 					msgbuf_clear(&ibuf_se->w);
653 					free(ibuf_se);
654 				}
655 				ibuf_se = i;
656 			} else {
657 				if (ibuf_se_ctl) {
658 					log_warnx("Unexpected imsg ctl "
659 					    "connection to SE received");
660 					msgbuf_clear(&ibuf_se_ctl->w);
661 					free(ibuf_se_ctl);
662 				}
663 				ibuf_se_ctl = i;
664 			}
665 			break;
666 		case IMSG_NETWORK_ADD:
667 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
668 			    sizeof(struct network_config)) {
669 				log_warnx("rde_dispatch: wrong imsg len");
670 				break;
671 			}
672 			memcpy(&netconf_p, imsg.data, sizeof(netconf_p));
673 			TAILQ_INIT(&netconf_p.attrset);
674 			parent_set = &netconf_p.attrset;
675 			break;
676 		case IMSG_NETWORK_DONE:
677 			parent_set = NULL;
678 			network_add(&netconf_p, 1);
679 			break;
680 		case IMSG_NETWORK_REMOVE:
681 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
682 			    sizeof(struct network_config)) {
683 				log_warnx("rde_dispatch: wrong imsg len");
684 				break;
685 			}
686 			memcpy(&netconf_p, imsg.data, sizeof(netconf_p));
687 			TAILQ_INIT(&netconf_p.attrset);
688 			network_delete(&netconf_p, 1);
689 			break;
690 		case IMSG_RECONF_CONF:
691 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
692 			    sizeof(struct bgpd_config))
693 				fatalx("IMSG_RECONF_CONF bad len");
694 			reloadtime = time(NULL);
695 			out_rules_tmp = calloc(1, sizeof(struct filter_head));
696 			if (out_rules_tmp == NULL)
697 				fatal(NULL);
698 			TAILQ_INIT(out_rules_tmp);
699 			newdomains = calloc(1, sizeof(struct rdomain_head));
700 			if (newdomains == NULL)
701 				fatal(NULL);
702 			SIMPLEQ_INIT(newdomains);
703 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
704 			    NULL)
705 				fatal(NULL);
706 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
707 			for (rid = 0; rid < rib_size; rid++) {
708 				if (*ribs[rid].name == '\0')
709 					break;
710 				ribs[rid].state = RECONF_DELETE;
711 			}
712 			break;
713 		case IMSG_RECONF_RIB:
714 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
715 			    sizeof(struct rde_rib))
716 				fatalx("IMSG_RECONF_RIB bad len");
717 			memcpy(&rn, imsg.data, sizeof(rn));
718 			rid = rib_find(rn.name);
719 			if (rid == RIB_FAILED)
720 				rib_new(rn.name, rn.rtableid, rn.flags);
721 			else if (ribs[rid].rtableid != rn.rtableid ||
722 			    (ribs[rid].flags & F_RIB_HASNOFIB) !=
723 			    (rn.flags & F_RIB_HASNOFIB)) {
724 				struct filter_head	*in_rules;
725 				/*
726 				 * Big hammer in the F_RIB_HASNOFIB case but
727 				 * not often enough used to optimise it more.
728 				 * Need to save the filters so that they're not
729 				 * lost.
730 				 */
731 				in_rules = ribs[rid].in_rules;
732 				ribs[rid].in_rules = NULL;
733 				rib_free(&ribs[rid]);
734 				rib_new(rn.name, rn.rtableid, rn.flags);
735 				ribs[rid].in_rules = in_rules;
736 			} else
737 				ribs[rid].state = RECONF_KEEP;
738 			break;
739 		case IMSG_RECONF_FILTER:
740 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
741 			    sizeof(struct filter_rule))
742 				fatalx("IMSG_RECONF_FILTER bad len");
743 			if ((r = malloc(sizeof(struct filter_rule))) == NULL)
744 				fatal(NULL);
745 			memcpy(r, imsg.data, sizeof(struct filter_rule));
746 			TAILQ_INIT(&r->set);
747 			if ((r->peer.ribid = rib_find(r->rib)) == RIB_FAILED) {
748 				log_warnx("IMSG_RECONF_FILTER: filter rule "
749 				    "for nonexistent rib %s", r->rib);
750 				parent_set = NULL;
751 				free(r);
752 				break;
753 			}
754 			parent_set = &r->set;
755 			if (r->dir == DIR_IN) {
756 				nr = ribs[r->peer.ribid].in_rules_tmp;
757 				if (nr == NULL) {
758 					nr = calloc(1,
759 					    sizeof(struct filter_head));
760 					if (nr == NULL)
761 						fatal(NULL);
762 					TAILQ_INIT(nr);
763 					ribs[r->peer.ribid].in_rules_tmp = nr;
764 				}
765 				TAILQ_INSERT_TAIL(nr, r, entry);
766 			} else
767 				TAILQ_INSERT_TAIL(out_rules_tmp, r, entry);
768 			break;
769 		case IMSG_RECONF_RDOMAIN:
770 			if (imsg.hdr.len - IMSG_HEADER_SIZE !=
771 			    sizeof(struct rdomain))
772 				fatalx("IMSG_RECONF_RDOMAIN bad len");
773 			if ((rd = malloc(sizeof(struct rdomain))) == NULL)
774 				fatal(NULL);
775 			memcpy(rd, imsg.data, sizeof(struct rdomain));
776 			TAILQ_INIT(&rd->import);
777 			TAILQ_INIT(&rd->export);
778 			SIMPLEQ_INSERT_TAIL(newdomains, rd, entry);
779 			break;
780 		case IMSG_RECONF_RDOMAIN_EXPORT:
781 			if (rd == NULL) {
782 				log_warnx("rde_dispatch_imsg_parent: "
783 				    "IMSG_RECONF_RDOMAIN_EXPORT unexpected");
784 				break;
785 			}
786 			parent_set = &rd->export;
787 			break;
788 		case IMSG_RECONF_RDOMAIN_IMPORT:
789 			if (rd == NULL) {
790 				log_warnx("rde_dispatch_imsg_parent: "
791 				    "IMSG_RECONF_RDOMAIN_IMPORT unexpected");
792 				break;
793 			}
794 			parent_set = &rd->import;
795 			break;
796 		case IMSG_RECONF_RDOMAIN_DONE:
797 			parent_set = NULL;
798 			break;
799 		case IMSG_RECONF_DONE:
800 			if (nconf == NULL)
801 				fatalx("got IMSG_RECONF_DONE but no config");
802 			parent_set = NULL;
803 
804 			rde_reload_done();
805 			break;
806 		case IMSG_NEXTHOP_UPDATE:
807 			nexthop_update(imsg.data);
808 			break;
809 		case IMSG_FILTER_SET:
810 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
811 			    sizeof(struct filter_set))
812 				fatalx("IMSG_FILTER_SET bad len");
813 			if (parent_set == NULL) {
814 				log_warnx("rde_dispatch_imsg_parent: "
815 				    "IMSG_FILTER_SET unexpected");
816 				break;
817 			}
818 			if ((s = malloc(sizeof(struct filter_set))) == NULL)
819 				fatal(NULL);
820 			memcpy(s, imsg.data, sizeof(struct filter_set));
821 			TAILQ_INSERT_TAIL(parent_set, s, entry);
822 
823 			if (s->type == ACTION_SET_NEXTHOP) {
824 				nh = nexthop_get(&s->action.nexthop);
825 				nh->refcnt++;
826 			}
827 			break;
828 		case IMSG_MRT_OPEN:
829 		case IMSG_MRT_REOPEN:
830 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
831 			    sizeof(struct mrt)) {
832 				log_warnx("wrong imsg len");
833 				break;
834 			}
835 			memcpy(&xmrt, imsg.data, sizeof(xmrt));
836 			if ((fd = imsg.fd) == -1)
837 				log_warnx("expected to receive fd for mrt dump "
838 				    "but didn't receive any");
839 			else if (xmrt.type == MRT_TABLE_DUMP ||
840 			    xmrt.type == MRT_TABLE_DUMP_MP ||
841 			    xmrt.type == MRT_TABLE_DUMP_V2) {
842 				rde_dump_mrt_new(&xmrt, imsg.hdr.pid, fd);
843 			} else
844 				close(fd);
845 			break;
846 		case IMSG_MRT_CLOSE:
847 			/* ignore end message because a dump is atomic */
848 			break;
849 		default:
850 			break;
851 		}
852 		imsg_free(&imsg);
853 	}
854 }
855 
856 /* handle routing updates from the session engine. */
857 int
858 rde_update_dispatch(struct imsg *imsg)
859 {
860 	struct bgpd_addr	 prefix;
861 	struct mpattr		 mpa;
862 	struct rde_peer		*peer;
863 	struct rde_aspath	*asp = NULL;
864 	u_char			*p, *mpp = NULL;
865 	int			 error = -1, pos = 0;
866 	u_int16_t		 afi, len, mplen;
867 	u_int16_t		 withdrawn_len;
868 	u_int16_t		 attrpath_len;
869 	u_int16_t		 nlri_len;
870 	u_int8_t		 aid, prefixlen, safi, subtype;
871 	u_int32_t		 fas;
872 
873 	peer = peer_get(imsg->hdr.peerid);
874 	if (peer == NULL)	/* unknown peer, cannot happen */
875 		return (-1);
876 	if (peer->state != PEER_UP)
877 		return (-1);	/* peer is not yet up, cannot happen */
878 
879 	p = imsg->data;
880 
881 	if (imsg->hdr.len < IMSG_HEADER_SIZE + 2) {
882 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
883 		return (-1);
884 	}
885 
886 	memcpy(&len, p, 2);
887 	withdrawn_len = ntohs(len);
888 	p += 2;
889 	if (imsg->hdr.len < IMSG_HEADER_SIZE + 2 + withdrawn_len + 2) {
890 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
891 		return (-1);
892 	}
893 
894 	p += withdrawn_len;
895 	memcpy(&len, p, 2);
896 	attrpath_len = len = ntohs(len);
897 	p += 2;
898 	if (imsg->hdr.len <
899 	    IMSG_HEADER_SIZE + 2 + withdrawn_len + 2 + attrpath_len) {
900 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST, NULL, 0);
901 		return (-1);
902 	}
903 
904 	nlri_len =
905 	    imsg->hdr.len - IMSG_HEADER_SIZE - 4 - withdrawn_len - attrpath_len;
906 	bzero(&mpa, sizeof(mpa));
907 
908 	if (attrpath_len != 0) { /* 0 = no NLRI information in this message */
909 		/* parse path attributes */
910 		asp = path_get();
911 		while (len > 0) {
912 			if ((pos = rde_attr_parse(p, len, peer, asp,
913 			    &mpa)) < 0)
914 				goto done;
915 			p += pos;
916 			len -= pos;
917 		}
918 
919 		/* check for missing but necessary attributes */
920 		if ((subtype = rde_attr_missing(asp, peer->conf.ebgp,
921 		    nlri_len))) {
922 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_MISSNG_WK_ATTR,
923 			    &subtype, sizeof(u_int8_t));
924 			goto done;
925 		}
926 
927 		rde_as4byte_fixup(peer, asp);
928 
929 		/* enforce remote AS if requested */
930 		if (asp->flags & F_ATTR_ASPATH &&
931 		    peer->conf.enforce_as == ENFORCE_AS_ON) {
932 			fas = aspath_neighbor(asp->aspath);
933 			if (peer->conf.remote_as != fas) {
934 			    log_peer_warnx(&peer->conf, "bad path, "
935 				"starting with %s, "
936 				"enforce neighbor-as enabled", log_as(fas));
937 			    rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
938 				    NULL, 0);
939 			    goto done;
940 			}
941 		}
942 
943 		rde_reflector(peer, asp);
944 	}
945 
946 	p = imsg->data;
947 	len = withdrawn_len;
948 	p += 2;
949 	/* withdraw prefix */
950 	while (len > 0) {
951 		if ((pos = rde_update_get_prefix(p, len, &prefix,
952 		    &prefixlen)) == -1) {
953 			/*
954 			 * the RFC does not mention what we should do in
955 			 * this case. Let's do the same as in the NLRI case.
956 			 */
957 			log_peer_warnx(&peer->conf, "bad withdraw prefix");
958 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
959 			    NULL, 0);
960 			goto done;
961 		}
962 		if (prefixlen > 32) {
963 			log_peer_warnx(&peer->conf, "bad withdraw prefix");
964 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
965 			    NULL, 0);
966 			goto done;
967 		}
968 
969 		p += pos;
970 		len -= pos;
971 
972 		if (peer->capa.mp[AID_INET] == 0) {
973 			log_peer_warnx(&peer->conf,
974 			    "bad withdraw, %s disabled", aid2str(AID_INET));
975 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
976 			    NULL, 0);
977 			goto done;
978 		}
979 
980 		rde_update_withdraw(peer, &prefix, prefixlen);
981 	}
982 
983 	if (attrpath_len == 0) {
984 		/* 0 = no NLRI information in this message */
985 		if (nlri_len != 0) {
986 			/* crap at end of update which should not be there */
987 			rde_update_err(peer, ERR_UPDATE,
988 			    ERR_UPD_ATTRLIST, NULL, 0);
989 			return (-1);
990 		}
991 		if (withdrawn_len == 0) {
992 			/* EoR marker */
993 			peer_recv_eor(peer, AID_INET);
994 		}
995 		return (0);
996 	}
997 
998 	/* withdraw MP_UNREACH_NLRI if available */
999 	if (mpa.unreach_len != 0) {
1000 		mpp = mpa.unreach;
1001 		mplen = mpa.unreach_len;
1002 		memcpy(&afi, mpp, 2);
1003 		mpp += 2;
1004 		mplen -= 2;
1005 		afi = ntohs(afi);
1006 		safi = *mpp++;
1007 		mplen--;
1008 
1009 		if (afi2aid(afi, safi, &aid) == -1) {
1010 			log_peer_warnx(&peer->conf,
1011 			    "bad AFI/SAFI pair in withdraw");
1012 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1013 			    NULL, 0);
1014 			goto done;
1015 		}
1016 
1017 		if (peer->capa.mp[aid] == 0) {
1018 			log_peer_warnx(&peer->conf,
1019 			    "bad withdraw, %s disabled", aid2str(aid));
1020 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1021 			    NULL, 0);
1022 			goto done;
1023 		}
1024 
1025 		if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0 && mplen == 0) {
1026 			/* EoR marker */
1027 			peer_recv_eor(peer, aid);
1028 		}
1029 
1030 		switch (aid) {
1031 		case AID_INET6:
1032 			while (mplen > 0) {
1033 				if ((pos = rde_update_get_prefix6(mpp, mplen,
1034 				    &prefix, &prefixlen)) == -1) {
1035 					log_peer_warnx(&peer->conf,
1036 					    "bad IPv6 withdraw prefix");
1037 					rde_update_err(peer, ERR_UPDATE,
1038 					    ERR_UPD_OPTATTR,
1039 					    mpa.unreach, mpa.unreach_len);
1040 					goto done;
1041 				}
1042 				if (prefixlen > 128) {
1043 					log_peer_warnx(&peer->conf,
1044 					    "bad IPv6 withdraw prefix");
1045 					rde_update_err(peer, ERR_UPDATE,
1046 					    ERR_UPD_OPTATTR,
1047 					    mpa.unreach, mpa.unreach_len);
1048 					goto done;
1049 				}
1050 
1051 				mpp += pos;
1052 				mplen -= pos;
1053 
1054 				rde_update_withdraw(peer, &prefix, prefixlen);
1055 			}
1056 			break;
1057 		case AID_VPN_IPv4:
1058 			while (mplen > 0) {
1059 				if ((pos = rde_update_get_vpn4(mpp, mplen,
1060 				    &prefix, &prefixlen)) == -1) {
1061 					log_peer_warnx(&peer->conf,
1062 					    "bad VPNv4 withdraw prefix");
1063 					rde_update_err(peer, ERR_UPDATE,
1064 					    ERR_UPD_OPTATTR,
1065 					    mpa.unreach, mpa.unreach_len);
1066 					goto done;
1067 				}
1068 				if (prefixlen > 32) {
1069 					log_peer_warnx(&peer->conf,
1070 					    "bad VPNv4 withdraw prefix");
1071 					rde_update_err(peer, ERR_UPDATE,
1072 					    ERR_UPD_OPTATTR,
1073 					    mpa.unreach, mpa.unreach_len);
1074 					goto done;
1075 				}
1076 
1077 				mpp += pos;
1078 				mplen -= pos;
1079 
1080 				rde_update_withdraw(peer, &prefix, prefixlen);
1081 			}
1082 			break;
1083 		default:
1084 			/* silently ignore unsupported multiprotocol AF */
1085 			break;
1086 		}
1087 
1088 		if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0) {
1089 			error = 0;
1090 			goto done;
1091 		}
1092 	}
1093 
1094 	/* shift to NLRI information */
1095 	p += 2 + attrpath_len;
1096 
1097 	/* aspath needs to be loop free nota bene this is not a hard error */
1098 	if (peer->conf.ebgp && !aspath_loopfree(asp->aspath, conf->as))
1099 		asp->flags |= F_ATTR_LOOP;
1100 
1101 	/* parse nlri prefix */
1102 	while (nlri_len > 0) {
1103 		if ((pos = rde_update_get_prefix(p, nlri_len, &prefix,
1104 		    &prefixlen)) == -1) {
1105 			log_peer_warnx(&peer->conf, "bad nlri prefix");
1106 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1107 			    NULL, 0);
1108 			goto done;
1109 		}
1110 		if (prefixlen > 32) {
1111 			log_peer_warnx(&peer->conf, "bad nlri prefix");
1112 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1113 			    NULL, 0);
1114 			goto done;
1115 		}
1116 
1117 		p += pos;
1118 		nlri_len -= pos;
1119 
1120 		if (peer->capa.mp[AID_INET] == 0) {
1121 			log_peer_warnx(&peer->conf,
1122 			    "bad update, %s disabled", aid2str(AID_INET));
1123 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1124 			    NULL, 0);
1125 			goto done;
1126 		}
1127 
1128 		rde_update_update(peer, asp, &prefix, prefixlen);
1129 
1130 		/* max prefix checker */
1131 		if (peer->conf.max_prefix &&
1132 		    peer->prefix_cnt >= peer->conf.max_prefix) {
1133 			log_peer_warnx(&peer->conf, "prefix limit reached"
1134 			    " (>%u/%u)", peer->prefix_cnt, peer->conf.max_prefix);
1135 			rde_update_err(peer, ERR_CEASE, ERR_CEASE_MAX_PREFIX,
1136 			    NULL, 0);
1137 			goto done;
1138 		}
1139 
1140 	}
1141 
1142 	/* add MP_REACH_NLRI if available */
1143 	if (mpa.reach_len != 0) {
1144 		mpp = mpa.reach;
1145 		mplen = mpa.reach_len;
1146 		memcpy(&afi, mpp, 2);
1147 		mpp += 2;
1148 		mplen -= 2;
1149 		afi = ntohs(afi);
1150 		safi = *mpp++;
1151 		mplen--;
1152 
1153 		if (afi2aid(afi, safi, &aid) == -1) {
1154 			log_peer_warnx(&peer->conf,
1155 			    "bad AFI/SAFI pair in update");
1156 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1157 			    NULL, 0);
1158 			goto done;
1159 		}
1160 
1161 		if (peer->capa.mp[aid] == 0) {
1162 			log_peer_warnx(&peer->conf,
1163 			    "bad update, %s disabled", aid2str(aid));
1164 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1165 			    NULL, 0);
1166 			goto done;
1167 		}
1168 
1169 		/*
1170 		 * this works because asp is not linked.
1171 		 * But first unlock the previously locked nexthop.
1172 		 */
1173 		if (asp->nexthop) {
1174 			asp->nexthop->refcnt--;
1175 			(void)nexthop_delete(asp->nexthop);
1176 			asp->nexthop = NULL;
1177 		}
1178 		if ((pos = rde_get_mp_nexthop(mpp, mplen, aid, asp)) == -1) {
1179 			log_peer_warnx(&peer->conf, "bad nlri prefix");
1180 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR,
1181 			    mpa.reach, mpa.reach_len);
1182 			goto done;
1183 		}
1184 		mpp += pos;
1185 		mplen -= pos;
1186 
1187 		switch (aid) {
1188 		case AID_INET6:
1189 			while (mplen > 0) {
1190 				if ((pos = rde_update_get_prefix6(mpp, mplen,
1191 				    &prefix, &prefixlen)) == -1) {
1192 					log_peer_warnx(&peer->conf,
1193 					    "bad IPv6 nlri prefix");
1194 					rde_update_err(peer, ERR_UPDATE,
1195 					    ERR_UPD_OPTATTR,
1196 					    mpa.reach, mpa.reach_len);
1197 					goto done;
1198 				}
1199 				if (prefixlen > 128) {
1200 					rde_update_err(peer, ERR_UPDATE,
1201 					    ERR_UPD_OPTATTR,
1202 					    mpa.reach, mpa.reach_len);
1203 					goto done;
1204 				}
1205 
1206 				mpp += pos;
1207 				mplen -= pos;
1208 
1209 				rde_update_update(peer, asp, &prefix,
1210 				    prefixlen);
1211 
1212 				/* max prefix checker */
1213 				if (peer->conf.max_prefix &&
1214 				    peer->prefix_cnt >= peer->conf.max_prefix) {
1215 					log_peer_warnx(&peer->conf,
1216 					    "prefix limit reached"
1217 					    " (>%u/%u)", peer->prefix_cnt,
1218 					    peer->conf.max_prefix);
1219 					rde_update_err(peer, ERR_CEASE,
1220 					    ERR_CEASE_MAX_PREFIX, NULL, 0);
1221 					goto done;
1222 				}
1223 
1224 			}
1225 			break;
1226 		case AID_VPN_IPv4:
1227 			while (mplen > 0) {
1228 				if ((pos = rde_update_get_vpn4(mpp, mplen,
1229 				    &prefix, &prefixlen)) == -1) {
1230 					log_peer_warnx(&peer->conf,
1231 					    "bad VPNv4 nlri prefix");
1232 					rde_update_err(peer, ERR_UPDATE,
1233 					    ERR_UPD_OPTATTR,
1234 					    mpa.reach, mpa.reach_len);
1235 					goto done;
1236 				}
1237 				if (prefixlen > 32) {
1238 					rde_update_err(peer, ERR_UPDATE,
1239 					    ERR_UPD_OPTATTR,
1240 					    mpa.reach, mpa.reach_len);
1241 					goto done;
1242 				}
1243 
1244 				mpp += pos;
1245 				mplen -= pos;
1246 
1247 				rde_update_update(peer, asp, &prefix,
1248 				    prefixlen);
1249 
1250 				/* max prefix checker */
1251 				if (peer->conf.max_prefix &&
1252 				    peer->prefix_cnt >= peer->conf.max_prefix) {
1253 					log_peer_warnx(&peer->conf,
1254 					    "prefix limit reached"
1255 					    " (>%u/%u)", peer->prefix_cnt,
1256 					    peer->conf.max_prefix);
1257 					rde_update_err(peer, ERR_CEASE,
1258 					    ERR_CEASE_MAX_PREFIX, NULL, 0);
1259 					goto done;
1260 				}
1261 
1262 			}
1263 			break;
1264 		default:
1265 			/* silently ignore unsupported multiprotocol AF */
1266 			break;
1267 		}
1268 	}
1269 
1270 done:
1271 	if (attrpath_len != 0) {
1272 		/* unlock the previously locked entry */
1273 		if (asp->nexthop) {
1274 			asp->nexthop->refcnt--;
1275 			(void)nexthop_delete(asp->nexthop);
1276 		}
1277 		/* free allocated attribute memory that is no longer used */
1278 		path_put(asp);
1279 	}
1280 
1281 	return (error);
1282 }
1283 
1284 void
1285 rde_update_update(struct rde_peer *peer, struct rde_aspath *asp,
1286     struct bgpd_addr *prefix, u_int8_t prefixlen)
1287 {
1288 	struct rde_aspath	*fasp;
1289 	enum filter_actions	 action;
1290 	int			 r = 0, f = 0;
1291 	u_int16_t		 i;
1292 
1293 	peer->prefix_rcvd_update++;
1294 	/* add original path to the Adj-RIB-In */
1295 	if (peer->conf.softreconfig_in)
1296 		r += path_update(&ribs[0], peer, asp, prefix, prefixlen);
1297 
1298 	for (i = 1; i < rib_size; i++) {
1299 		if (*ribs[i].name == '\0')
1300 			break;
1301 		/* input filter */
1302 		action = rde_filter(ribs[i].in_rules, &fasp, peer, asp, prefix,
1303 		    prefixlen, peer);
1304 
1305 		if (fasp == NULL)
1306 			fasp = asp;
1307 
1308 		if (action == ACTION_ALLOW) {
1309 			rde_update_log("update", i, peer,
1310 			    &fasp->nexthop->exit_nexthop, prefix, prefixlen);
1311 			r += path_update(&ribs[i], peer, fasp, prefix,
1312 			    prefixlen);
1313 		} else if (prefix_remove(&ribs[i], peer, prefix, prefixlen,
1314 		    0)) {
1315 			rde_update_log("filtered withdraw", i, peer,
1316 			    NULL, prefix, prefixlen);
1317 			f++;
1318 		}
1319 
1320 		/* free modified aspath */
1321 		if (fasp != asp)
1322 			path_put(fasp);
1323 	}
1324 
1325 	if (r)
1326 		peer->prefix_cnt++;
1327 	else if (f)
1328 		peer->prefix_cnt--;
1329 }
1330 
1331 void
1332 rde_update_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix,
1333     u_int8_t prefixlen)
1334 {
1335 	int r = 0;
1336 	u_int16_t i;
1337 
1338 	peer->prefix_rcvd_withdraw++;
1339 
1340 	for (i = rib_size - 1; ; i--) {
1341 		if (*ribs[i].name == '\0')
1342 			break;
1343 		if (prefix_remove(&ribs[i], peer, prefix, prefixlen, 0)) {
1344 			rde_update_log("withdraw", i, peer, NULL, prefix,
1345 			    prefixlen);
1346 			r++;
1347 		}
1348 		if (i == 0)
1349 			break;
1350 	}
1351 
1352 	if (r)
1353 		peer->prefix_cnt--;
1354 }
1355 
1356 /*
1357  * BGP UPDATE parser functions
1358  */
1359 
1360 /* attribute parser specific makros */
1361 #define UPD_READ(t, p, plen, n) \
1362 	do { \
1363 		memcpy(t, p, n); \
1364 		p += n; \
1365 		plen += n; \
1366 	} while (0)
1367 
1368 #define CHECK_FLAGS(s, t, m)	\
1369 	(((s) & ~(ATTR_DEFMASK | (m))) == (t))
1370 
1371 int
1372 rde_attr_parse(u_char *p, u_int16_t len, struct rde_peer *peer,
1373     struct rde_aspath *a, struct mpattr *mpa)
1374 {
1375 	struct bgpd_addr nexthop;
1376 	u_char		*op = p, *npath;
1377 	u_int32_t	 tmp32;
1378 	int		 error;
1379 	u_int16_t	 attr_len, nlen;
1380 	u_int16_t	 plen = 0;
1381 	u_int8_t	 flags;
1382 	u_int8_t	 type;
1383 	u_int8_t	 tmp8;
1384 
1385 	if (len < 3) {
1386 bad_len:
1387 		rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLEN, op, len);
1388 		return (-1);
1389 	}
1390 
1391 	UPD_READ(&flags, p, plen, 1);
1392 	UPD_READ(&type, p, plen, 1);
1393 
1394 	if (flags & ATTR_EXTLEN) {
1395 		if (len - plen < 2)
1396 			goto bad_len;
1397 		UPD_READ(&attr_len, p, plen, 2);
1398 		attr_len = ntohs(attr_len);
1399 	} else {
1400 		UPD_READ(&tmp8, p, plen, 1);
1401 		attr_len = tmp8;
1402 	}
1403 
1404 	if (len - plen < attr_len)
1405 		goto bad_len;
1406 
1407 	/* adjust len to the actual attribute size including header */
1408 	len = plen + attr_len;
1409 
1410 	switch (type) {
1411 	case ATTR_UNDEF:
1412 		/* ignore and drop path attributes with a type code of 0 */
1413 		plen += attr_len;
1414 		break;
1415 	case ATTR_ORIGIN:
1416 		if (attr_len != 1)
1417 			goto bad_len;
1418 
1419 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) {
1420 bad_flags:
1421 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRFLAGS,
1422 			    op, len);
1423 			return (-1);
1424 		}
1425 
1426 		UPD_READ(&a->origin, p, plen, 1);
1427 		if (a->origin > ORIGIN_INCOMPLETE) {
1428 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ORIGIN,
1429 			    op, len);
1430 			return (-1);
1431 		}
1432 		if (a->flags & F_ATTR_ORIGIN)
1433 			goto bad_list;
1434 		a->flags |= F_ATTR_ORIGIN;
1435 		break;
1436 	case ATTR_ASPATH:
1437 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1438 			goto bad_flags;
1439 		error = aspath_verify(p, attr_len, rde_as4byte(peer));
1440 		if (error == AS_ERR_SOFT) {
1441 			/*
1442 			 * soft errors like unexpected segment types are
1443 			 * not considered fatal and the path is just
1444 			 * marked invalid.
1445 			 */
1446 			a->flags |= F_ATTR_PARSE_ERR;
1447 			log_peer_warnx(&peer->conf, "bad ASPATH, "
1448 			    "path invalidated and prefix withdrawn");
1449 		} else if (error != 0) {
1450 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
1451 			    NULL, 0);
1452 			return (-1);
1453 		}
1454 		if (a->flags & F_ATTR_ASPATH)
1455 			goto bad_list;
1456 		if (rde_as4byte(peer)) {
1457 			npath = p;
1458 			nlen = attr_len;
1459 		} else
1460 			npath = aspath_inflate(p, attr_len, &nlen);
1461 		a->flags |= F_ATTR_ASPATH;
1462 		a->aspath = aspath_get(npath, nlen);
1463 		if (npath != p)
1464 			free(npath);
1465 		plen += attr_len;
1466 		break;
1467 	case ATTR_NEXTHOP:
1468 		if (attr_len != 4)
1469 			goto bad_len;
1470 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1471 			goto bad_flags;
1472 		if (a->flags & F_ATTR_NEXTHOP)
1473 			goto bad_list;
1474 		a->flags |= F_ATTR_NEXTHOP;
1475 
1476 		bzero(&nexthop, sizeof(nexthop));
1477 		nexthop.aid = AID_INET;
1478 		UPD_READ(&nexthop.v4.s_addr, p, plen, 4);
1479 		/*
1480 		 * Check if the nexthop is a valid IP address. We consider
1481 		 * multicast and experimental addresses as invalid.
1482 		 */
1483 		tmp32 = ntohl(nexthop.v4.s_addr);
1484 		if (IN_MULTICAST(tmp32) || IN_BADCLASS(tmp32)) {
1485 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_NETWORK,
1486 			    op, len);
1487 			return (-1);
1488 		}
1489 		a->nexthop = nexthop_get(&nexthop);
1490 		/*
1491 		 * lock the nexthop because it is not yet linked else
1492 		 * withdraws may remove this nexthop which in turn would
1493 		 * cause a use after free error.
1494 		 */
1495 		a->nexthop->refcnt++;
1496 		break;
1497 	case ATTR_MED:
1498 		if (attr_len != 4)
1499 			goto bad_len;
1500 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1501 			goto bad_flags;
1502 		if (a->flags & F_ATTR_MED)
1503 			goto bad_list;
1504 		a->flags |= F_ATTR_MED;
1505 
1506 		UPD_READ(&tmp32, p, plen, 4);
1507 		a->med = ntohl(tmp32);
1508 		break;
1509 	case ATTR_LOCALPREF:
1510 		if (attr_len != 4)
1511 			goto bad_len;
1512 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1513 			goto bad_flags;
1514 		if (peer->conf.ebgp) {
1515 			/* ignore local-pref attr on non ibgp peers */
1516 			plen += 4;
1517 			break;
1518 		}
1519 		if (a->flags & F_ATTR_LOCALPREF)
1520 			goto bad_list;
1521 		a->flags |= F_ATTR_LOCALPREF;
1522 
1523 		UPD_READ(&tmp32, p, plen, 4);
1524 		a->lpref = ntohl(tmp32);
1525 		break;
1526 	case ATTR_ATOMIC_AGGREGATE:
1527 		if (attr_len != 0)
1528 			goto bad_len;
1529 		if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0))
1530 			goto bad_flags;
1531 		goto optattr;
1532 	case ATTR_AGGREGATOR:
1533 		if ((!rde_as4byte(peer) && attr_len != 6) ||
1534 		    (rde_as4byte(peer) && attr_len != 8)) {
1535 			/*
1536 			 * ignore attribute in case of error as per
1537 			 * draft-ietf-idr-optional-transitive-00.txt
1538 			 * but only if partial bit is set
1539 			 */
1540 			if ((flags & ATTR_PARTIAL) == 0)
1541 				goto bad_len;
1542 			log_peer_warnx(&peer->conf, "bad AGGREGATOR, "
1543 			    "partial attribute ignored");
1544 			plen += attr_len;
1545 			break;
1546 		}
1547 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1548 		    ATTR_PARTIAL))
1549 			goto bad_flags;
1550 		if (!rde_as4byte(peer)) {
1551 			/* need to inflate aggregator AS to 4-byte */
1552 			u_char	t[8];
1553 			t[0] = t[1] = 0;
1554 			UPD_READ(&t[2], p, plen, 2);
1555 			UPD_READ(&t[4], p, plen, 4);
1556 			if (attr_optadd(a, flags, type, t,
1557 			    sizeof(t)) == -1)
1558 				goto bad_list;
1559 			break;
1560 		}
1561 		/* 4-byte ready server take the default route */
1562 		goto optattr;
1563 	case ATTR_COMMUNITIES:
1564 		if (attr_len % 4 != 0) {
1565 			/*
1566 			 * mark update as bad and withdraw all routes as per
1567 			 * draft-ietf-idr-optional-transitive-00.txt
1568 			 * but only if partial bit is set
1569 			 */
1570 			if ((flags & ATTR_PARTIAL) == 0)
1571 				goto bad_len;
1572 			a->flags |= F_ATTR_PARSE_ERR;
1573 			log_peer_warnx(&peer->conf, "bad COMMUNITIES, "
1574 			    "path invalidated and prefix withdrawn");
1575 		}
1576 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1577 		    ATTR_PARTIAL))
1578 			goto bad_flags;
1579 		goto optattr;
1580 	case ATTR_EXT_COMMUNITIES:
1581 		if (attr_len % 8 != 0) {
1582 			/*
1583 			 * mark update as bad and withdraw all routes as per
1584 			 * draft-ietf-idr-optional-transitive-00.txt
1585 			 * but only if partial bit is set
1586 			 */
1587 			if ((flags & ATTR_PARTIAL) == 0)
1588 				goto bad_len;
1589 			a->flags |= F_ATTR_PARSE_ERR;
1590 			log_peer_warnx(&peer->conf, "bad EXT_COMMUNITIES, "
1591 			    "path invalidated and prefix withdrawn");
1592 		}
1593 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1594 		    ATTR_PARTIAL))
1595 			goto bad_flags;
1596 		goto optattr;
1597 	case ATTR_ORIGINATOR_ID:
1598 		if (attr_len != 4)
1599 			goto bad_len;
1600 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1601 			goto bad_flags;
1602 		goto optattr;
1603 	case ATTR_CLUSTER_LIST:
1604 		if (attr_len % 4 != 0)
1605 			goto bad_len;
1606 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1607 			goto bad_flags;
1608 		goto optattr;
1609 	case ATTR_MP_REACH_NLRI:
1610 		if (attr_len < 4)
1611 			goto bad_len;
1612 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1613 			goto bad_flags;
1614 		/* the validity is checked in rde_update_dispatch() */
1615 		if (a->flags & F_ATTR_MP_REACH)
1616 			goto bad_list;
1617 		a->flags |= F_ATTR_MP_REACH;
1618 
1619 		mpa->reach = p;
1620 		mpa->reach_len = attr_len;
1621 		plen += attr_len;
1622 		break;
1623 	case ATTR_MP_UNREACH_NLRI:
1624 		if (attr_len < 3)
1625 			goto bad_len;
1626 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0))
1627 			goto bad_flags;
1628 		/* the validity is checked in rde_update_dispatch() */
1629 		if (a->flags & F_ATTR_MP_UNREACH)
1630 			goto bad_list;
1631 		a->flags |= F_ATTR_MP_UNREACH;
1632 
1633 		mpa->unreach = p;
1634 		mpa->unreach_len = attr_len;
1635 		plen += attr_len;
1636 		break;
1637 	case ATTR_AS4_AGGREGATOR:
1638 		if (attr_len != 8) {
1639 			/* see ATTR_AGGREGATOR ... */
1640 			if ((flags & ATTR_PARTIAL) == 0)
1641 				goto bad_len;
1642 			log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, "
1643 			    "partial attribute ignored");
1644 			plen += attr_len;
1645 			break;
1646 		}
1647 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1648 		    ATTR_PARTIAL))
1649 			goto bad_flags;
1650 		a->flags |= F_ATTR_AS4BYTE_NEW;
1651 		goto optattr;
1652 	case ATTR_AS4_PATH:
1653 		if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE,
1654 		    ATTR_PARTIAL))
1655 			goto bad_flags;
1656 		if ((error = aspath_verify(p, attr_len, 1)) != 0) {
1657 			/*
1658 			 * XXX RFC does not specify how to handle errors.
1659 			 * XXX Instead of dropping the session because of a
1660 			 * XXX bad path just mark the full update as having
1661 			 * XXX a parse error which makes the update no longer
1662 			 * XXX eligible and will not be considered for routing
1663 			 * XXX or redistribution.
1664 			 * XXX We follow draft-ietf-idr-optional-transitive
1665 			 * XXX by looking at the partial bit.
1666 			 * XXX Consider soft errors similar to a partial attr.
1667 			 */
1668 			if (flags & ATTR_PARTIAL || error == AS_ERR_SOFT) {
1669 				a->flags |= F_ATTR_PARSE_ERR;
1670 				log_peer_warnx(&peer->conf, "bad AS4_PATH, "
1671 				    "path invalidated and prefix withdrawn");
1672 				goto optattr;
1673 			} else {
1674 				rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH,
1675 				    NULL, 0);
1676 				return (-1);
1677 			}
1678 		}
1679 		a->flags |= F_ATTR_AS4BYTE_NEW;
1680 		goto optattr;
1681 	default:
1682 		if ((flags & ATTR_OPTIONAL) == 0) {
1683 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_UNKNWN_WK_ATTR,
1684 			    op, len);
1685 			return (-1);
1686 		}
1687 optattr:
1688 		if (attr_optadd(a, flags, type, p, attr_len) == -1) {
1689 bad_list:
1690 			rde_update_err(peer, ERR_UPDATE, ERR_UPD_ATTRLIST,
1691 			    NULL, 0);
1692 			return (-1);
1693 		}
1694 
1695 		plen += attr_len;
1696 		break;
1697 	}
1698 
1699 	return (plen);
1700 }
1701 
1702 int
1703 rde_attr_add(struct rde_aspath *a, u_char *p, u_int16_t len)
1704 {
1705 	u_int16_t	 attr_len;
1706 	u_int16_t	 plen = 0;
1707 	u_int8_t	 flags;
1708 	u_int8_t	 type;
1709 	u_int8_t	 tmp8;
1710 
1711 	if (a == NULL)		/* no aspath, nothing to do */
1712 		return (0);
1713 	if (len < 3)
1714 		return (-1);
1715 
1716 	UPD_READ(&flags, p, plen, 1);
1717 	UPD_READ(&type, p, plen, 1);
1718 
1719 	if (flags & ATTR_EXTLEN) {
1720 		if (len - plen < 2)
1721 			return (-1);
1722 		UPD_READ(&attr_len, p, plen, 2);
1723 		attr_len = ntohs(attr_len);
1724 	} else {
1725 		UPD_READ(&tmp8, p, plen, 1);
1726 		attr_len = tmp8;
1727 	}
1728 
1729 	if (len - plen < attr_len)
1730 		return (-1);
1731 
1732 	if (attr_optadd(a, flags, type, p, attr_len) == -1)
1733 		return (-1);
1734 	return (0);
1735 }
1736 
1737 #undef UPD_READ
1738 #undef CHECK_FLAGS
1739 
1740 u_int8_t
1741 rde_attr_missing(struct rde_aspath *a, int ebgp, u_int16_t nlrilen)
1742 {
1743 	/* ATTR_MP_UNREACH_NLRI may be sent alone */
1744 	if (nlrilen == 0 && a->flags & F_ATTR_MP_UNREACH &&
1745 	    (a->flags & F_ATTR_MP_REACH) == 0)
1746 		return (0);
1747 
1748 	if ((a->flags & F_ATTR_ORIGIN) == 0)
1749 		return (ATTR_ORIGIN);
1750 	if ((a->flags & F_ATTR_ASPATH) == 0)
1751 		return (ATTR_ASPATH);
1752 	if ((a->flags & F_ATTR_MP_REACH) == 0 &&
1753 	    (a->flags & F_ATTR_NEXTHOP) == 0)
1754 		return (ATTR_NEXTHOP);
1755 	if (!ebgp)
1756 		if ((a->flags & F_ATTR_LOCALPREF) == 0)
1757 			return (ATTR_LOCALPREF);
1758 	return (0);
1759 }
1760 
1761 int
1762 rde_get_mp_nexthop(u_char *data, u_int16_t len, u_int8_t aid,
1763     struct rde_aspath *asp)
1764 {
1765 	struct bgpd_addr	nexthop;
1766 	u_int8_t		totlen, nhlen;
1767 
1768 	if (len == 0)
1769 		return (-1);
1770 
1771 	nhlen = *data++;
1772 	totlen = 1;
1773 	len--;
1774 
1775 	if (nhlen > len)
1776 		return (-1);
1777 
1778 	bzero(&nexthop, sizeof(nexthop));
1779 	nexthop.aid = aid;
1780 	switch (aid) {
1781 	case AID_INET6:
1782 		/*
1783 		 * RFC2545 describes that there may be a link-local
1784 		 * address carried in nexthop. Yikes!
1785 		 * This is not only silly, it is wrong and we just ignore
1786 		 * this link-local nexthop. The bgpd session doesn't run
1787 		 * over the link-local address so why should all other
1788 		 * traffic.
1789 		 */
1790 		if (nhlen != 16 && nhlen != 32) {
1791 			log_warnx("bad multiprotocol nexthop, bad size");
1792 			return (-1);
1793 		}
1794 		memcpy(&nexthop.v6.s6_addr, data, 16);
1795 		break;
1796 	case AID_VPN_IPv4:
1797 		/*
1798 		 * Neither RFC4364 nor RFC3107 specify the format of the
1799 		 * nexthop in an explicit way. The quality of RFC went down
1800 		 * the toilet the larger the number got.
1801 		 * RFC4364 is very confusing about VPN-IPv4 address and the
1802 		 * VPN-IPv4 prefix that carries also a MPLS label.
1803 		 * So the nexthop is a 12-byte address with a 64bit RD and
1804 		 * an IPv4 address following. In the nexthop case the RD can
1805 		 * be ignored.
1806 		 * Since the nexthop has to be in the main IPv4 table just
1807 		 * create an AID_INET nexthop. So we don't need to handle
1808 		 * AID_VPN_IPv4 in nexthop and kroute.
1809 		 */
1810 		if (nhlen != 12) {
1811 			log_warnx("bad multiprotocol nexthop, bad size");
1812 			return (-1);
1813 		}
1814 		data += sizeof(u_int64_t);
1815 		nexthop.aid = AID_INET;
1816 		memcpy(&nexthop.v4, data, sizeof(nexthop.v4));
1817 		break;
1818 	default:
1819 		log_warnx("bad multiprotocol nexthop, bad AID");
1820 		return (-1);
1821 	}
1822 
1823 	asp->nexthop = nexthop_get(&nexthop);
1824 	/*
1825 	 * lock the nexthop because it is not yet linked else
1826 	 * withdraws may remove this nexthop which in turn would
1827 	 * cause a use after free error.
1828 	 */
1829 	asp->nexthop->refcnt++;
1830 
1831 	/* ignore reserved (old SNPA) field as per RFC4760 */
1832 	totlen += nhlen + 1;
1833 	data += nhlen + 1;
1834 
1835 	return (totlen);
1836 }
1837 
1838 int
1839 rde_update_extract_prefix(u_char *p, u_int16_t len, void *va,
1840     u_int8_t pfxlen, u_int8_t max)
1841 {
1842 	static u_char addrmask[] = {
1843 	    0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
1844 	u_char		*a = va;
1845 	int		 i;
1846 	u_int16_t	 plen = 0;
1847 
1848 	for (i = 0; pfxlen && i < max; i++) {
1849 		if (len <= plen)
1850 			return (-1);
1851 		if (pfxlen < 8) {
1852 			a[i] = *p++ & addrmask[pfxlen];
1853 			plen++;
1854 			break;
1855 		} else {
1856 			a[i] = *p++;
1857 			plen++;
1858 			pfxlen -= 8;
1859 		}
1860 	}
1861 	return (plen);
1862 }
1863 
1864 int
1865 rde_update_get_prefix(u_char *p, u_int16_t len, struct bgpd_addr *prefix,
1866     u_int8_t *prefixlen)
1867 {
1868 	u_int8_t	 pfxlen;
1869 	int		 plen;
1870 
1871 	if (len < 1)
1872 		return (-1);
1873 
1874 	pfxlen = *p++;
1875 	len--;
1876 
1877 	bzero(prefix, sizeof(struct bgpd_addr));
1878 	prefix->aid = AID_INET;
1879 	*prefixlen = pfxlen;
1880 
1881 	if ((plen = rde_update_extract_prefix(p, len, &prefix->v4, pfxlen,
1882 	    sizeof(prefix->v4))) == -1)
1883 		return (-1);
1884 
1885 	return (plen + 1);	/* pfxlen needs to be added */
1886 }
1887 
1888 int
1889 rde_update_get_prefix6(u_char *p, u_int16_t len, struct bgpd_addr *prefix,
1890     u_int8_t *prefixlen)
1891 {
1892 	int		plen;
1893 	u_int8_t	pfxlen;
1894 
1895 	if (len < 1)
1896 		return (-1);
1897 
1898 	pfxlen = *p++;
1899 	len--;
1900 
1901 	bzero(prefix, sizeof(struct bgpd_addr));
1902 	prefix->aid = AID_INET6;
1903 	*prefixlen = pfxlen;
1904 
1905 	if ((plen = rde_update_extract_prefix(p, len, &prefix->v6, pfxlen,
1906 	    sizeof(prefix->v6))) == -1)
1907 		return (-1);
1908 
1909 	return (plen + 1);	/* pfxlen needs to be added */
1910 }
1911 
1912 int
1913 rde_update_get_vpn4(u_char *p, u_int16_t len, struct bgpd_addr *prefix,
1914     u_int8_t *prefixlen)
1915 {
1916 	int		 rv, done = 0;
1917 	u_int8_t	 pfxlen;
1918 	u_int16_t	 plen;
1919 
1920 	if (len < 1)
1921 		return (-1);
1922 
1923 	memcpy(&pfxlen, p, 1);
1924 	p += 1;
1925 	plen = 1;
1926 
1927 	bzero(prefix, sizeof(struct bgpd_addr));
1928 
1929 	/* label stack */
1930 	do {
1931 		if (len - plen < 3 || pfxlen < 3 * 8)
1932 			return (-1);
1933 		if (prefix->vpn4.labellen + 3U >
1934 		    sizeof(prefix->vpn4.labelstack))
1935 			return (-1);
1936 		prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++;
1937 		prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++;
1938 		prefix->vpn4.labelstack[prefix->vpn4.labellen] = *p++;
1939 		if (prefix->vpn4.labelstack[prefix->vpn4.labellen] &
1940 		    BGP_MPLS_BOS)
1941 			done = 1;
1942 		prefix->vpn4.labellen++;
1943 		plen += 3;
1944 		pfxlen -= 3 * 8;
1945 	} while (!done);
1946 
1947 	/* RD */
1948 	if (len - plen < (int)sizeof(u_int64_t) ||
1949 	    pfxlen < sizeof(u_int64_t) * 8)
1950 		return (-1);
1951 	memcpy(&prefix->vpn4.rd, p, sizeof(u_int64_t));
1952 	pfxlen -= sizeof(u_int64_t) * 8;
1953 	p += sizeof(u_int64_t);
1954 	plen += sizeof(u_int64_t);
1955 
1956 	/* prefix */
1957 	prefix->aid = AID_VPN_IPv4;
1958 	*prefixlen = pfxlen;
1959 
1960 	if ((rv = rde_update_extract_prefix(p, len, &prefix->vpn4.addr,
1961 	    pfxlen, sizeof(prefix->vpn4.addr))) == -1)
1962 		return (-1);
1963 
1964 	return (plen + rv);
1965 }
1966 
1967 void
1968 rde_update_err(struct rde_peer *peer, u_int8_t error, u_int8_t suberr,
1969     void *data, u_int16_t size)
1970 {
1971 	struct ibuf	*wbuf;
1972 
1973 	if ((wbuf = imsg_create(ibuf_se, IMSG_UPDATE_ERR, peer->conf.id, 0,
1974 	    size + sizeof(error) + sizeof(suberr))) == NULL)
1975 		fatal("%s %d imsg_create error", __func__, __LINE__);
1976 	if (imsg_add(wbuf, &error, sizeof(error)) == -1 ||
1977 	    imsg_add(wbuf, &suberr, sizeof(suberr)) == -1 ||
1978 	    imsg_add(wbuf, data, size) == -1)
1979 		fatal("%s %d imsg_add error", __func__, __LINE__);
1980 	imsg_close(ibuf_se, wbuf);
1981 	peer->state = PEER_ERR;
1982 }
1983 
1984 void
1985 rde_update_log(const char *message, u_int16_t rid,
1986     const struct rde_peer *peer, const struct bgpd_addr *next,
1987     const struct bgpd_addr *prefix, u_int8_t prefixlen)
1988 {
1989 	char		*l = NULL;
1990 	char		*n = NULL;
1991 	char		*p = NULL;
1992 
1993 	if ( !((conf->log & BGPD_LOG_UPDATES) ||
1994 	       (peer->conf.flags & PEERFLAG_LOG_UPDATES)) )
1995 		return;
1996 
1997 	if (next != NULL)
1998 		if (asprintf(&n, " via %s", log_addr(next)) == -1)
1999 			n = NULL;
2000 	if (asprintf(&p, "%s/%u", log_addr(prefix), prefixlen) == -1)
2001 		p = NULL;
2002 	l = log_fmt_peer(&peer->conf);
2003 	log_info("Rib %s: %s AS%s: %s %s%s", ribs[rid].name,
2004 	    l, log_as(peer->conf.remote_as), message,
2005 	    p ? p : "out of memory", n ? n : "");
2006 
2007 	free(l);
2008 	free(n);
2009 	free(p);
2010 }
2011 
2012 /*
2013  * 4-Byte ASN helper function.
2014  * Two scenarios need to be considered:
2015  * - NEW session with NEW attributes present -> just remove the attributes
2016  * - OLD session with NEW attributes present -> try to merge them
2017  */
2018 void
2019 rde_as4byte_fixup(struct rde_peer *peer, struct rde_aspath *a)
2020 {
2021 	struct attr	*nasp, *naggr, *oaggr;
2022 	u_int32_t	 as;
2023 
2024 	/*
2025 	 * if either ATTR_AS4_AGGREGATOR or ATTR_AS4_PATH is present
2026 	 * try to fixup the attributes.
2027 	 * Do not fixup if F_ATTR_PARSE_ERR is set.
2028 	 */
2029 	if (!(a->flags & F_ATTR_AS4BYTE_NEW) || a->flags & F_ATTR_PARSE_ERR)
2030 		return;
2031 
2032 	/* first get the attributes */
2033 	nasp = attr_optget(a, ATTR_AS4_PATH);
2034 	naggr = attr_optget(a, ATTR_AS4_AGGREGATOR);
2035 
2036 	if (rde_as4byte(peer)) {
2037 		/* NEW session using 4-byte ASNs */
2038 		if (nasp) {
2039 			log_peer_warnx(&peer->conf, "uses 4-byte ASN "
2040 			    "but sent AS4_PATH attribute.");
2041 			attr_free(a, nasp);
2042 		}
2043 		if (naggr) {
2044 			log_peer_warnx(&peer->conf, "uses 4-byte ASN "
2045 			    "but sent AS4_AGGREGATOR attribute.");
2046 			attr_free(a, naggr);
2047 		}
2048 		return;
2049 	}
2050 	/* OLD session using 2-byte ASNs */
2051 	/* try to merge the new attributes into the old ones */
2052 	if ((oaggr = attr_optget(a, ATTR_AGGREGATOR))) {
2053 		memcpy(&as, oaggr->data, sizeof(as));
2054 		if (ntohl(as) != AS_TRANS) {
2055 			/* per RFC ignore AS4_PATH and AS4_AGGREGATOR */
2056 			if (nasp)
2057 				attr_free(a, nasp);
2058 			if (naggr)
2059 				attr_free(a, naggr);
2060 			return;
2061 		}
2062 		if (naggr) {
2063 			/* switch over to new AGGREGATOR */
2064 			attr_free(a, oaggr);
2065 			if (attr_optadd(a, ATTR_OPTIONAL | ATTR_TRANSITIVE,
2066 			    ATTR_AGGREGATOR, naggr->data, naggr->len))
2067 				fatalx("attr_optadd failed but impossible");
2068 		}
2069 	}
2070 	/* there is no need for AS4_AGGREGATOR any more */
2071 	if (naggr)
2072 		attr_free(a, naggr);
2073 
2074 	/* merge AS4_PATH with ASPATH */
2075 	if (nasp)
2076 		aspath_merge(a, nasp);
2077 }
2078 
2079 
2080 /*
2081  * route reflector helper function
2082  */
2083 void
2084 rde_reflector(struct rde_peer *peer, struct rde_aspath *asp)
2085 {
2086 	struct attr	*a;
2087 	u_int8_t	*p;
2088 	u_int16_t	 len;
2089 	u_int32_t	 id;
2090 
2091 	/* do not consider updates with parse errors */
2092 	if (asp->flags & F_ATTR_PARSE_ERR)
2093 		return;
2094 
2095 	/* check for originator id if eq router_id drop */
2096 	if ((a = attr_optget(asp, ATTR_ORIGINATOR_ID)) != NULL) {
2097 		if (memcmp(&conf->bgpid, a->data, sizeof(conf->bgpid)) == 0) {
2098 			/* this is coming from myself */
2099 			asp->flags |= F_ATTR_LOOP;
2100 			return;
2101 		}
2102 	} else if (conf->flags & BGPD_FLAG_REFLECTOR) {
2103 		if (peer->conf.ebgp)
2104 			id = conf->bgpid;
2105 		else
2106 			id = htonl(peer->remote_bgpid);
2107 		if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_ORIGINATOR_ID,
2108 		    &id, sizeof(u_int32_t)) == -1)
2109 			fatalx("attr_optadd failed but impossible");
2110 	}
2111 
2112 	/* check for own id in the cluster list */
2113 	if (conf->flags & BGPD_FLAG_REFLECTOR) {
2114 		if ((a = attr_optget(asp, ATTR_CLUSTER_LIST)) != NULL) {
2115 			for (len = 0; len < a->len;
2116 			    len += sizeof(conf->clusterid))
2117 				/* check if coming from my cluster */
2118 				if (memcmp(&conf->clusterid, a->data + len,
2119 				    sizeof(conf->clusterid)) == 0) {
2120 					asp->flags |= F_ATTR_LOOP;
2121 					return;
2122 				}
2123 
2124 			/* prepend own clusterid by replacing attribute */
2125 			len = a->len + sizeof(conf->clusterid);
2126 			if (len < a->len)
2127 				fatalx("rde_reflector: cluster-list overflow");
2128 			if ((p = malloc(len)) == NULL)
2129 				fatal("rde_reflector");
2130 			memcpy(p, &conf->clusterid, sizeof(conf->clusterid));
2131 			memcpy(p + sizeof(conf->clusterid), a->data, a->len);
2132 			attr_free(asp, a);
2133 			if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST,
2134 			    p, len) == -1)
2135 				fatalx("attr_optadd failed but impossible");
2136 			free(p);
2137 		} else if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_CLUSTER_LIST,
2138 		    &conf->clusterid, sizeof(conf->clusterid)) == -1)
2139 			fatalx("attr_optadd failed but impossible");
2140 	}
2141 }
2142 
2143 /*
2144  * control specific functions
2145  */
2146 void
2147 rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags)
2148 {
2149 	struct ctl_show_rib	 rib;
2150 	struct ibuf		*wbuf;
2151 	struct attr		*a;
2152 	void			*bp;
2153 	time_t			 staletime;
2154 	u_int8_t		 l;
2155 
2156 	bzero(&rib, sizeof(rib));
2157 	rib.lastchange = p->lastchange;
2158 	rib.local_pref = asp->lpref;
2159 	rib.med = asp->med;
2160 	rib.weight = asp->weight;
2161 	strlcpy(rib.descr, asp->peer->conf.descr, sizeof(rib.descr));
2162 	memcpy(&rib.remote_addr, &asp->peer->remote_addr,
2163 	    sizeof(rib.remote_addr));
2164 	rib.remote_id = asp->peer->remote_bgpid;
2165 	if (asp->nexthop != NULL) {
2166 		memcpy(&rib.true_nexthop, &asp->nexthop->true_nexthop,
2167 		    sizeof(rib.true_nexthop));
2168 		memcpy(&rib.exit_nexthop, &asp->nexthop->exit_nexthop,
2169 		    sizeof(rib.exit_nexthop));
2170 	} else {
2171 		/* announced network may have a NULL nexthop */
2172 		bzero(&rib.true_nexthop, sizeof(rib.true_nexthop));
2173 		bzero(&rib.exit_nexthop, sizeof(rib.exit_nexthop));
2174 		rib.true_nexthop.aid = p->prefix->aid;
2175 		rib.exit_nexthop.aid = p->prefix->aid;
2176 	}
2177 	pt_getaddr(p->prefix, &rib.prefix);
2178 	rib.prefixlen = p->prefix->prefixlen;
2179 	rib.origin = asp->origin;
2180 	rib.flags = 0;
2181 	if (p->rib->active == p)
2182 		rib.flags |= F_PREF_ACTIVE;
2183 	if (!asp->peer->conf.ebgp)
2184 		rib.flags |= F_PREF_INTERNAL;
2185 	if (asp->flags & F_PREFIX_ANNOUNCED)
2186 		rib.flags |= F_PREF_ANNOUNCE;
2187 	if (asp->nexthop == NULL || asp->nexthop->state == NEXTHOP_REACH)
2188 		rib.flags |= F_PREF_ELIGIBLE;
2189 	if (asp->flags & F_ATTR_LOOP)
2190 		rib.flags &= ~F_PREF_ELIGIBLE;
2191 	staletime = asp->peer->staletime[p->prefix->aid];
2192 	if (staletime && p->lastchange <= staletime)
2193 		rib.flags |= F_PREF_STALE;
2194 	rib.aspath_len = aspath_length(asp->aspath);
2195 
2196 	if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid,
2197 	    sizeof(rib) + rib.aspath_len)) == NULL)
2198 		return;
2199 	if (imsg_add(wbuf, &rib, sizeof(rib)) == -1 ||
2200 	    imsg_add(wbuf, aspath_dump(asp->aspath),
2201 	    rib.aspath_len) == -1)
2202 		return;
2203 	imsg_close(ibuf_se_ctl, wbuf);
2204 
2205 	if (flags & F_CTL_DETAIL)
2206 		for (l = 0; l < asp->others_len; l++) {
2207 			if ((a = asp->others[l]) == NULL)
2208 				break;
2209 			if ((wbuf = imsg_create(ibuf_se_ctl,
2210 			    IMSG_CTL_SHOW_RIB_ATTR, 0, pid,
2211 			    attr_optlen(a))) == NULL)
2212 				return;
2213 			if ((bp = ibuf_reserve(wbuf, attr_optlen(a))) == NULL) {
2214 				ibuf_free(wbuf);
2215 				return;
2216 			}
2217 			if (attr_write(bp, attr_optlen(a), a->flags,
2218 			    a->type, a->data, a->len) == -1) {
2219 				ibuf_free(wbuf);
2220 				return;
2221 			}
2222 			imsg_close(ibuf_se_ctl, wbuf);
2223 		}
2224 }
2225 
2226 void
2227 rde_dump_filterout(struct rde_peer *peer, struct prefix *p,
2228     struct ctl_show_rib_request *req)
2229 {
2230 	struct bgpd_addr	 addr;
2231 	struct rde_aspath	*asp;
2232 	enum filter_actions	 a;
2233 
2234 	if (up_test_update(peer, p) != 1)
2235 		return;
2236 
2237 	pt_getaddr(p->prefix, &addr);
2238 	a = rde_filter(out_rules, &asp, peer, p->aspath, &addr,
2239 	    p->prefix->prefixlen, p->aspath->peer);
2240 	if (asp)
2241 		asp->peer = p->aspath->peer;
2242 	else
2243 		asp = p->aspath;
2244 
2245 	if (a == ACTION_ALLOW)
2246 		rde_dump_rib_as(p, asp, req->pid, req->flags);
2247 
2248 	if (asp != p->aspath)
2249 		path_put(asp);
2250 }
2251 
2252 void
2253 rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req)
2254 {
2255 	struct rde_peer		*peer;
2256 
2257 	if (req->flags & F_CTL_ADJ_IN ||
2258 	    !(req->flags & (F_CTL_ADJ_IN|F_CTL_ADJ_OUT))) {
2259 		if (req->peerid && req->peerid != p->aspath->peer->conf.id)
2260 			return;
2261 		if (req->type == IMSG_CTL_SHOW_RIB_AS &&
2262 		    !aspath_match(p->aspath->aspath->data,
2263 		    p->aspath->aspath->len, &req->as, req->as.as))
2264 			return;
2265 		if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY &&
2266 		    !community_match(p->aspath, req->community.as,
2267 		    req->community.type))
2268 			return;
2269 		if ((req->flags & F_CTL_ACTIVE) && p->rib->active != p)
2270 			return;
2271 		rde_dump_rib_as(p, p->aspath, req->pid, req->flags);
2272 	} else if (req->flags & F_CTL_ADJ_OUT) {
2273 		if (p->rib->active != p)
2274 			/* only consider active prefix */
2275 			return;
2276 		if (req->peerid) {
2277 			if ((peer = peer_get(req->peerid)) != NULL)
2278 				rde_dump_filterout(peer, p, req);
2279 			return;
2280 		}
2281 	}
2282 }
2283 
2284 void
2285 rde_dump_upcall(struct rib_entry *re, void *ptr)
2286 {
2287 	struct prefix		*p;
2288 	struct rde_dump_ctx	*ctx = ptr;
2289 
2290 	LIST_FOREACH(p, &re->prefix_h, rib_l)
2291 		rde_dump_filter(p, &ctx->req);
2292 }
2293 
2294 void
2295 rde_dump_prefix_upcall(struct rib_entry *re, void *ptr)
2296 {
2297 	struct rde_dump_ctx	*ctx = ptr;
2298 	struct prefix		*p;
2299 	struct pt_entry		*pt;
2300 	struct bgpd_addr	 addr;
2301 
2302 	pt = re->prefix;
2303 	pt_getaddr(pt, &addr);
2304 	if (addr.aid != ctx->req.prefix.aid)
2305 		return;
2306 	if (ctx->req.prefixlen > pt->prefixlen)
2307 		return;
2308 	if (!prefix_compare(&ctx->req.prefix, &addr, ctx->req.prefixlen))
2309 		LIST_FOREACH(p, &re->prefix_h, rib_l)
2310 			rde_dump_filter(p, &ctx->req);
2311 }
2312 
2313 void
2314 rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid,
2315     enum imsg_type type)
2316 {
2317 	struct rde_dump_ctx	*ctx;
2318 	struct rib_entry	*re;
2319 	u_int			 error;
2320 	u_int16_t		 id;
2321 	u_int8_t		 hostplen;
2322 
2323 	if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
2324 		log_warn("rde_dump_ctx_new");
2325 		error = CTL_RES_NOMEM;
2326 		imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
2327 		    sizeof(error));
2328 		return;
2329 	}
2330 	if ((id = rib_find(req->rib)) == RIB_FAILED) {
2331 		log_warnx("rde_dump_ctx_new: no such rib %s", req->rib);
2332 		error = CTL_RES_NOSUCHPEER;
2333 		imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error,
2334 		    sizeof(error));
2335 		free(ctx);
2336 		return;
2337 	}
2338 
2339 	memcpy(&ctx->req, req, sizeof(struct ctl_show_rib_request));
2340 	ctx->req.pid = pid;
2341 	ctx->req.type = type;
2342 	ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS;
2343 	ctx->ribctx.ctx_rib = &ribs[id];
2344 	switch (ctx->req.type) {
2345 	case IMSG_CTL_SHOW_NETWORK:
2346 		ctx->ribctx.ctx_upcall = network_dump_upcall;
2347 		break;
2348 	case IMSG_CTL_SHOW_RIB:
2349 	case IMSG_CTL_SHOW_RIB_AS:
2350 	case IMSG_CTL_SHOW_RIB_COMMUNITY:
2351 		ctx->ribctx.ctx_upcall = rde_dump_upcall;
2352 		break;
2353 	case IMSG_CTL_SHOW_RIB_PREFIX:
2354 		if (req->flags & F_LONGER) {
2355 			ctx->ribctx.ctx_upcall = rde_dump_prefix_upcall;
2356 			break;
2357 		}
2358 		switch (req->prefix.aid) {
2359 		case AID_INET:
2360 		case AID_VPN_IPv4:
2361 			hostplen = 32;
2362 			break;
2363 		case AID_INET6:
2364 			hostplen = 128;
2365 			break;
2366 		default:
2367 			fatalx("rde_dump_ctx_new: unknown af");
2368 		}
2369 		if (req->prefixlen == hostplen)
2370 			re = rib_lookup(&ribs[id], &req->prefix);
2371 		else
2372 			re = rib_get(&ribs[id], &req->prefix, req->prefixlen);
2373 		if (re)
2374 			rde_dump_upcall(re, ctx);
2375 		rde_dump_done(ctx);
2376 		return;
2377 	default:
2378 		fatalx("rde_dump_ctx_new: unsupported imsg type");
2379 	}
2380 	ctx->ribctx.ctx_done = rde_dump_done;
2381 	ctx->ribctx.ctx_arg = ctx;
2382 	ctx->ribctx.ctx_aid = ctx->req.aid;
2383 	rib_dump_r(&ctx->ribctx);
2384 }
2385 
2386 void
2387 rde_dump_done(void *arg)
2388 {
2389 	struct rde_dump_ctx	*ctx = arg;
2390 
2391 	imsg_compose(ibuf_se_ctl, IMSG_CTL_END, 0, ctx->req.pid,
2392 	    -1, NULL, 0);
2393 	free(ctx);
2394 }
2395 
2396 void
2397 rde_dump_mrt_new(struct mrt *mrt, pid_t pid, int fd)
2398 {
2399 	struct rde_mrt_ctx	*ctx;
2400 	u_int16_t		 id;
2401 
2402 	if ((ctx = calloc(1, sizeof(*ctx))) == NULL) {
2403 		log_warn("rde_dump_mrt_new");
2404 		return;
2405 	}
2406 	memcpy(&ctx->mrt, mrt, sizeof(struct mrt));
2407 	TAILQ_INIT(&ctx->mrt.wbuf.bufs);
2408 	ctx->mrt.wbuf.fd = fd;
2409 	ctx->mrt.state = MRT_STATE_RUNNING;
2410 	id = rib_find(ctx->mrt.rib);
2411 	if (id == RIB_FAILED) {
2412 		log_warnx("non existing RIB %s for mrt dump", ctx->mrt.rib);
2413 		free(ctx);
2414 		return;
2415 	}
2416 
2417 	if (ctx->mrt.type == MRT_TABLE_DUMP_V2)
2418 		mrt_dump_v2_hdr(&ctx->mrt, conf, &peerlist);
2419 
2420 	ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS;
2421 	ctx->ribctx.ctx_rib = &ribs[id];
2422 	ctx->ribctx.ctx_upcall = mrt_dump_upcall;
2423 	ctx->ribctx.ctx_done = mrt_done;
2424 	ctx->ribctx.ctx_arg = &ctx->mrt;
2425 	ctx->ribctx.ctx_aid = AID_UNSPEC;
2426 	LIST_INSERT_HEAD(&rde_mrts, ctx, entry);
2427 	rde_mrt_cnt++;
2428 	rib_dump_r(&ctx->ribctx);
2429 }
2430 
2431 /*
2432  * kroute specific functions
2433  */
2434 int
2435 rde_rdomain_import(struct rde_aspath *asp, struct rdomain *rd)
2436 {
2437 	struct filter_set	*s;
2438 
2439 	TAILQ_FOREACH(s, &rd->import, entry) {
2440 		if (community_ext_match(asp, &s->action.ext_community, 0))
2441 			return (1);
2442 	}
2443 	return (0);
2444 }
2445 
2446 void
2447 rde_send_kroute(struct prefix *new, struct prefix *old, u_int16_t ribid)
2448 {
2449 	struct kroute_full	 kr;
2450 	struct bgpd_addr	 addr;
2451 	struct prefix		*p;
2452 	struct rdomain		*rd;
2453 	enum imsg_type		 type;
2454 
2455 	/*
2456 	 * Make sure that self announce prefixes are not committed to the
2457 	 * FIB. If both prefixes are unreachable no update is needed.
2458 	 */
2459 	if ((old == NULL || old->aspath->flags & F_PREFIX_ANNOUNCED) &&
2460 	    (new == NULL || new->aspath->flags & F_PREFIX_ANNOUNCED))
2461 		return;
2462 
2463 	if (new == NULL || new->aspath->flags & F_PREFIX_ANNOUNCED) {
2464 		type = IMSG_KROUTE_DELETE;
2465 		p = old;
2466 	} else {
2467 		type = IMSG_KROUTE_CHANGE;
2468 		p = new;
2469 	}
2470 
2471 	pt_getaddr(p->prefix, &addr);
2472 	bzero(&kr, sizeof(kr));
2473 	memcpy(&kr.prefix, &addr, sizeof(kr.prefix));
2474 	kr.prefixlen = p->prefix->prefixlen;
2475 	if (p->aspath->flags & F_NEXTHOP_REJECT)
2476 		kr.flags |= F_REJECT;
2477 	if (p->aspath->flags & F_NEXTHOP_BLACKHOLE)
2478 		kr.flags |= F_BLACKHOLE;
2479 	if (type == IMSG_KROUTE_CHANGE)
2480 		memcpy(&kr.nexthop, &p->aspath->nexthop->true_nexthop,
2481 		    sizeof(kr.nexthop));
2482 	strlcpy(kr.label, rtlabel_id2name(p->aspath->rtlabelid),
2483 	    sizeof(kr.label));
2484 
2485 	switch (addr.aid) {
2486 	case AID_VPN_IPv4:
2487 		if (ribid != 1)
2488 			/* not Loc-RIB, no update for VPNs */
2489 			break;
2490 
2491 		SIMPLEQ_FOREACH(rd, rdomains_l, entry) {
2492 			if (!rde_rdomain_import(p->aspath, rd))
2493 				continue;
2494 			/* must send exit_nexthop so that correct MPLS tunnel
2495 			 * is chosen
2496 			 */
2497 			if (type == IMSG_KROUTE_CHANGE)
2498 				memcpy(&kr.nexthop,
2499 				    &p->aspath->nexthop->exit_nexthop,
2500 				    sizeof(kr.nexthop));
2501 			if (imsg_compose(ibuf_main, type, rd->rtableid, 0, -1,
2502 			    &kr, sizeof(kr)) == -1)
2503 				fatal("%s %d imsg_compose error", __func__,
2504 				    __LINE__);
2505 		}
2506 		break;
2507 	default:
2508 		if (imsg_compose(ibuf_main, type, ribs[ribid].rtableid, 0, -1,
2509 		    &kr, sizeof(kr)) == -1)
2510 			fatal("%s %d imsg_compose error", __func__, __LINE__);
2511 		break;
2512 	}
2513 }
2514 
2515 /*
2516  * pf table specific functions
2517  */
2518 void
2519 rde_send_pftable(u_int16_t id, struct bgpd_addr *addr,
2520     u_int8_t len, int del)
2521 {
2522 	struct pftable_msg pfm;
2523 
2524 	if (id == 0)
2525 		return;
2526 
2527 	/* do not run while cleaning up */
2528 	if (rde_quit)
2529 		return;
2530 
2531 	bzero(&pfm, sizeof(pfm));
2532 	strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable));
2533 	memcpy(&pfm.addr, addr, sizeof(pfm.addr));
2534 	pfm.len = len;
2535 
2536 	if (imsg_compose(ibuf_main,
2537 	    del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD,
2538 	    0, 0, -1, &pfm, sizeof(pfm)) == -1)
2539 		fatal("%s %d imsg_compose error", __func__, __LINE__);
2540 }
2541 
2542 void
2543 rde_send_pftable_commit(void)
2544 {
2545 	/* do not run while cleaning up */
2546 	if (rde_quit)
2547 		return;
2548 
2549 	if (imsg_compose(ibuf_main, IMSG_PFTABLE_COMMIT, 0, 0, -1, NULL, 0) ==
2550 	    -1)
2551 		fatal("%s %d imsg_compose error", __func__, __LINE__);
2552 }
2553 
2554 /*
2555  * nexthop specific functions
2556  */
2557 void
2558 rde_send_nexthop(struct bgpd_addr *next, int valid)
2559 {
2560 	int			 type;
2561 
2562 	if (valid)
2563 		type = IMSG_NEXTHOP_ADD;
2564 	else
2565 		type = IMSG_NEXTHOP_REMOVE;
2566 
2567 	if (imsg_compose(ibuf_main, type, 0, 0, -1, next,
2568 	    sizeof(struct bgpd_addr)) == -1)
2569 		fatal("%s %d imsg_compose error", __func__, __LINE__);
2570 }
2571 
2572 /*
2573  * soft reconfig specific functions
2574  */
2575 void
2576 rde_reload_done(void)
2577 {
2578 	struct rdomain		*rd;
2579 	struct rde_peer		*peer;
2580 	struct filter_head	*fh;
2581 	u_int16_t		 rid;
2582 
2583 	/* first merge the main config */
2584 	if ((nconf->flags & BGPD_FLAG_NO_EVALUATE)
2585 	    != (conf->flags & BGPD_FLAG_NO_EVALUATE)) {
2586 		log_warnx("change to/from route-collector "
2587 		    "mode ignored");
2588 		if (conf->flags & BGPD_FLAG_NO_EVALUATE)
2589 			nconf->flags |= BGPD_FLAG_NO_EVALUATE;
2590 		else
2591 			nconf->flags &= ~BGPD_FLAG_NO_EVALUATE;
2592 	}
2593 	memcpy(conf, nconf, sizeof(struct bgpd_config));
2594 	conf->listen_addrs = NULL;
2595 	conf->csock = NULL;
2596 	conf->rcsock = NULL;
2597 	free(nconf);
2598 	nconf = NULL;
2599 
2600 	/* sync peerself with conf */
2601 	peerself->remote_bgpid = ntohl(conf->bgpid);
2602 	peerself->conf.local_as = conf->as;
2603 	peerself->conf.remote_as = conf->as;
2604 	peerself->short_as = conf->short_as;
2605 
2606 	/* apply new set of rdomain, sync will be done later */
2607 	while ((rd = SIMPLEQ_FIRST(rdomains_l)) != NULL) {
2608 		SIMPLEQ_REMOVE_HEAD(rdomains_l, entry);
2609 		filterset_free(&rd->import);
2610 		filterset_free(&rd->export);
2611 		free(rd);
2612 	}
2613 	free(rdomains_l);
2614 	rdomains_l = newdomains;
2615 	/* XXX WHERE IS THE SYNC ??? */
2616 
2617 	rde_filter_calc_skip_steps(out_rules_tmp);
2618 
2619 	/*
2620 	 * make the new filter rules the active one but keep the old for
2621 	 * softrconfig. This is needed so that changes happening are using
2622 	 * the right filters.
2623 	 */
2624 	fh = out_rules;
2625 	out_rules = out_rules_tmp;
2626 	out_rules_tmp = fh;
2627 
2628 	/* check if filter changed */
2629 	LIST_FOREACH(peer, &peerlist, peer_l) {
2630 		if (peer->conf.id == 0)
2631 			continue;
2632 		peer->reconf_out = 0;
2633 		peer->reconf_rib = 0;
2634 		if (peer->ribid != rib_find(peer->conf.rib)) {
2635 			rib_dump(&ribs[peer->ribid],
2636 			    rde_softreconfig_unload_peer, peer, AID_UNSPEC);
2637 			peer->ribid = rib_find(peer->conf.rib);
2638 			if (peer->ribid == RIB_FAILED)
2639 				fatalx("King Bula's peer met an unknown RIB");
2640 			peer->reconf_rib = 1;
2641 			continue;
2642 		}
2643 		if (peer->conf.softreconfig_out &&
2644 		    !rde_filter_equal(out_rules, out_rules_tmp, peer)) {
2645 			peer->reconf_out = 1;
2646 		}
2647 	}
2648 	/* bring ribs in sync */
2649 	for (rid = 0; rid < rib_size; rid++) {
2650 		if (*ribs[rid].name == '\0')
2651 			continue;
2652 		rde_filter_calc_skip_steps(ribs[rid].in_rules_tmp);
2653 
2654 		/* flip rules, make new active */
2655 		fh = ribs[rid].in_rules;
2656 		ribs[rid].in_rules = ribs[rid].in_rules_tmp;
2657 		ribs[rid].in_rules_tmp = fh;
2658 
2659 		switch (ribs[rid].state) {
2660 		case RECONF_DELETE:
2661 			rib_free(&ribs[rid]);
2662 			break;
2663 		case RECONF_KEEP:
2664 			if (rde_filter_equal(ribs[rid].in_rules,
2665 			    ribs[rid].in_rules_tmp, NULL))
2666 				/* rib is in sync */
2667 				break;
2668 			ribs[rid].state = RECONF_RELOAD;
2669 			/* FALLTHROUGH */
2670 		case RECONF_REINIT:
2671 			rib_dump(&ribs[0], rde_softreconfig_in, &ribs[rid],
2672 			    AID_UNSPEC);
2673 			break;
2674 		case RECONF_RELOAD:
2675 			log_warnx("Bad rib reload state");
2676 			/* FALLTHROUGH */
2677 		case RECONF_NONE:
2678 			break;
2679 		}
2680 	}
2681 	LIST_FOREACH(peer, &peerlist, peer_l) {
2682 		if (peer->reconf_out)
2683 			rib_dump(&ribs[peer->ribid], rde_softreconfig_out,
2684 			    peer, AID_UNSPEC);
2685 		else if (peer->reconf_rib)
2686 			/* dump the full table to neighbors that changed rib */
2687 			peer_dump(peer->conf.id, AID_UNSPEC);
2688 	}
2689 	filterlist_free(out_rules_tmp);
2690 	out_rules_tmp = NULL;
2691 	for (rid = 0; rid < rib_size; rid++) {
2692 		if (*ribs[rid].name == '\0')
2693 			continue;
2694 		filterlist_free(ribs[rid].in_rules_tmp);
2695 		ribs[rid].in_rules_tmp = NULL;
2696 		ribs[rid].state = RECONF_NONE;
2697 	}
2698 
2699 	log_info("RDE reconfigured");
2700 	imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
2701 	    -1, NULL, 0);
2702 }
2703 
2704 void
2705 rde_softreconfig_in(struct rib_entry *re, void *ptr)
2706 {
2707 	struct rib		*rib = ptr;
2708 	struct prefix		*p, *np;
2709 	struct pt_entry		*pt;
2710 	struct rde_peer		*peer;
2711 	struct rde_aspath	*asp, *oasp, *nasp;
2712 	enum filter_actions	 oa, na;
2713 	struct bgpd_addr	 addr;
2714 
2715 	pt = re->prefix;
2716 	pt_getaddr(pt, &addr);
2717 	for (p = LIST_FIRST(&re->prefix_h); p != NULL; p = np) {
2718 		/*
2719 		 * prefix_remove() and path_update() may change the object
2720 		 * so cache the values.
2721 		 */
2722 		np = LIST_NEXT(p, rib_l);
2723 		asp = p->aspath;
2724 		peer = asp->peer;
2725 
2726 		/* check if prefix changed */
2727 		if (rib->state == RECONF_RELOAD) {
2728 			oa = rde_filter(rib->in_rules_tmp, &oasp, peer,
2729 			    asp, &addr, pt->prefixlen, peer);
2730 			oasp = oasp != NULL ? oasp : asp;
2731 		} else {
2732 			/* make sure we update everything for RECONF_REINIT */
2733 			oa = ACTION_DENY;
2734 			oasp = asp;
2735 		}
2736 		na = rde_filter(rib->in_rules, &nasp, peer, asp,
2737 		    &addr, pt->prefixlen, peer);
2738 		nasp = nasp != NULL ? nasp : asp;
2739 
2740 		/* go through all 4 possible combinations */
2741 		/* if (oa == ACTION_DENY && na == ACTION_DENY) */
2742 			/* nothing todo */
2743 		if (oa == ACTION_DENY && na == ACTION_ALLOW) {
2744 			/* update Local-RIB */
2745 			path_update(rib, peer, nasp, &addr, pt->prefixlen);
2746 		} else if (oa == ACTION_ALLOW && na == ACTION_DENY) {
2747 			/* remove from Local-RIB */
2748 			prefix_remove(rib, peer, &addr, pt->prefixlen, 0);
2749 		} else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) {
2750 			if (path_compare(nasp, oasp) != 0)
2751 				/* send update */
2752 				path_update(rib, peer, nasp, &addr,
2753 				    pt->prefixlen);
2754 		}
2755 
2756 		if (oasp != asp)
2757 			path_put(oasp);
2758 		if (nasp != asp)
2759 			path_put(nasp);
2760 	}
2761 }
2762 
2763 void
2764 rde_softreconfig_out(struct rib_entry *re, void *ptr)
2765 {
2766 	struct prefix		*p = re->active;
2767 	struct pt_entry		*pt;
2768 	struct rde_peer		*peer = ptr;
2769 	struct rde_aspath	*oasp, *nasp;
2770 	enum filter_actions	 oa, na;
2771 	struct bgpd_addr	 addr;
2772 
2773 	if (peer->conf.id == 0)
2774 		fatalx("King Bula troubled by bad peer");
2775 
2776 	if (p == NULL)
2777 		return;
2778 
2779 	pt = re->prefix;
2780 	pt_getaddr(pt, &addr);
2781 
2782 	if (up_test_update(peer, p) != 1)
2783 		return;
2784 
2785 	oa = rde_filter(out_rules_tmp, &oasp, peer, p->aspath,
2786 	    &addr, pt->prefixlen, p->aspath->peer);
2787 	na = rde_filter(out_rules, &nasp, peer, p->aspath,
2788 	    &addr, pt->prefixlen, p->aspath->peer);
2789 	oasp = oasp != NULL ? oasp : p->aspath;
2790 	nasp = nasp != NULL ? nasp : p->aspath;
2791 
2792 	/* go through all 4 possible combinations */
2793 	/* if (oa == ACTION_DENY && na == ACTION_DENY) */
2794 		/* nothing todo */
2795 	if (oa == ACTION_DENY && na == ACTION_ALLOW) {
2796 		/* send update */
2797 		up_generate(peer, nasp, &addr, pt->prefixlen);
2798 	} else if (oa == ACTION_ALLOW && na == ACTION_DENY) {
2799 		/* send withdraw */
2800 		up_generate(peer, NULL, &addr, pt->prefixlen);
2801 	} else if (oa == ACTION_ALLOW && na == ACTION_ALLOW) {
2802 		/* send update if path attributes changed */
2803 		if (path_compare(nasp, oasp) != 0)
2804 			up_generate(peer, nasp, &addr, pt->prefixlen);
2805 	}
2806 
2807 	if (oasp != p->aspath)
2808 		path_put(oasp);
2809 	if (nasp != p->aspath)
2810 		path_put(nasp);
2811 }
2812 
2813 void
2814 rde_softreconfig_unload_peer(struct rib_entry *re, void *ptr)
2815 {
2816 	struct rde_peer		*peer = ptr;
2817 	struct prefix		*p = re->active;
2818 	struct pt_entry		*pt;
2819 	struct rde_aspath	*oasp;
2820 	enum filter_actions	 oa;
2821 	struct bgpd_addr	 addr;
2822 
2823 	pt = re->prefix;
2824 	pt_getaddr(pt, &addr);
2825 
2826 	/* check if prefix was announced */
2827 	if (up_test_update(peer, p) != 1)
2828 		return;
2829 
2830 	oa = rde_filter(out_rules_tmp, &oasp, peer, p->aspath,
2831 	    &addr, pt->prefixlen, p->aspath->peer);
2832 	oasp = oasp != NULL ? oasp : p->aspath;
2833 
2834 	if (oa == ACTION_DENY)
2835 		/* nothing todo */
2836 		goto done;
2837 
2838 	/* send withdraw */
2839 	up_generate(peer, NULL, &addr, pt->prefixlen);
2840 done:
2841 	if (oasp != p->aspath)
2842 		path_put(oasp);
2843 }
2844 
2845 /*
2846  * update specific functions
2847  */
2848 u_char	queue_buf[4096];
2849 
2850 void
2851 rde_up_dump_upcall(struct rib_entry *re, void *ptr)
2852 {
2853 	struct rde_peer		*peer = ptr;
2854 
2855 	if (re->ribid != peer->ribid)
2856 		fatalx("King Bula: monstrous evil horror.");
2857 	if (re->active == NULL)
2858 		return;
2859 	up_generate_updates(out_rules, peer, re->active, NULL);
2860 }
2861 
2862 void
2863 rde_generate_updates(u_int16_t ribid, struct prefix *new, struct prefix *old)
2864 {
2865 	struct rde_peer			*peer;
2866 
2867 	/*
2868 	 * If old is != NULL we know it was active and should be removed.
2869 	 * If new is != NULL we know it is reachable and then we should
2870 	 * generate an update.
2871 	 */
2872 	if (old == NULL && new == NULL)
2873 		return;
2874 
2875 	LIST_FOREACH(peer, &peerlist, peer_l) {
2876 		if (peer->conf.id == 0)
2877 			continue;
2878 		if (peer->ribid != ribid)
2879 			continue;
2880 		if (peer->state != PEER_UP)
2881 			continue;
2882 		up_generate_updates(out_rules, peer, new, old);
2883 	}
2884 }
2885 
2886 void
2887 rde_update_queue_runner(void)
2888 {
2889 	struct rde_peer		*peer;
2890 	int			 r, sent, max = RDE_RUNNER_ROUNDS, eor = 0;
2891 	u_int16_t		 len, wd_len, wpos;
2892 
2893 	len = sizeof(queue_buf) - MSGSIZE_HEADER;
2894 	do {
2895 		sent = 0;
2896 		LIST_FOREACH(peer, &peerlist, peer_l) {
2897 			if (peer->conf.id == 0)
2898 				continue;
2899 			if (peer->state != PEER_UP)
2900 				continue;
2901 			/* first withdraws */
2902 			wpos = 2; /* reserve space for the length field */
2903 			r = up_dump_prefix(queue_buf + wpos, len - wpos - 2,
2904 			    &peer->withdraws[AID_INET], peer);
2905 			wd_len = r;
2906 			/* write withdraws length filed */
2907 			wd_len = htons(wd_len);
2908 			memcpy(queue_buf, &wd_len, 2);
2909 			wpos += r;
2910 
2911 			/* now bgp path attributes */
2912 			r = up_dump_attrnlri(queue_buf + wpos, len - wpos,
2913 			    peer);
2914 			switch (r) {
2915 			case -1:
2916 				eor = 1;
2917 				if (wd_len == 0) {
2918 					/* no withdraws queued just send EoR */
2919 					peer_send_eor(peer, AID_INET);
2920 					continue;
2921 				}
2922 				break;
2923 			case 2:
2924 				if (wd_len == 0) {
2925 					/*
2926 					 * No packet to send. No withdraws and
2927 					 * no path attributes. Skip.
2928 					 */
2929 					continue;
2930 				}
2931 				/* FALLTHROUGH */
2932 			default:
2933 				wpos += r;
2934 				break;
2935 			}
2936 
2937 			/* finally send message to SE */
2938 			if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
2939 			    0, -1, queue_buf, wpos) == -1)
2940 				fatal("%s %d imsg_compose error", __func__,
2941 				    __LINE__);
2942 			sent++;
2943 			if (eor) {
2944 				eor = 0;
2945 				peer_send_eor(peer, AID_INET);
2946 			}
2947 		}
2948 		max -= sent;
2949 	} while (sent != 0 && max > 0);
2950 }
2951 
2952 void
2953 rde_update6_queue_runner(u_int8_t aid)
2954 {
2955 	struct rde_peer		*peer;
2956 	u_char			*b;
2957 	int			 r, sent, max = RDE_RUNNER_ROUNDS / 2;
2958 	u_int16_t		 len;
2959 
2960 	/* first withdraws ... */
2961 	do {
2962 		sent = 0;
2963 		LIST_FOREACH(peer, &peerlist, peer_l) {
2964 			if (peer->conf.id == 0)
2965 				continue;
2966 			if (peer->state != PEER_UP)
2967 				continue;
2968 			len = sizeof(queue_buf) - MSGSIZE_HEADER;
2969 			b = up_dump_mp_unreach(queue_buf, &len, peer, aid);
2970 
2971 			if (b == NULL)
2972 				continue;
2973 			/* finally send message to SE */
2974 			if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
2975 			    0, -1, b, len) == -1)
2976 				fatal("%s %d imsg_compose error", __func__,
2977 				    __LINE__);
2978 			sent++;
2979 		}
2980 		max -= sent;
2981 	} while (sent != 0 && max > 0);
2982 
2983 	/* ... then updates */
2984 	max = RDE_RUNNER_ROUNDS / 2;
2985 	do {
2986 		sent = 0;
2987 		LIST_FOREACH(peer, &peerlist, peer_l) {
2988 			if (peer->conf.id == 0)
2989 				continue;
2990 			if (peer->state != PEER_UP)
2991 				continue;
2992 			len = sizeof(queue_buf) - MSGSIZE_HEADER;
2993 			r = up_dump_mp_reach(queue_buf, &len, peer, aid);
2994 			switch (r) {
2995 			case -2:
2996 				continue;
2997 			case -1:
2998 				peer_send_eor(peer, aid);
2999 				continue;
3000 			default:
3001 				b = queue_buf + r;
3002 				break;
3003 			}
3004 
3005 			/* finally send message to SE */
3006 			if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3007 			    0, -1, b, len) == -1)
3008 				fatal("%s %d imsg_compose error", __func__,
3009 				    __LINE__);
3010 			sent++;
3011 		}
3012 		max -= sent;
3013 	} while (sent != 0 && max > 0);
3014 }
3015 
3016 /*
3017  * generic helper function
3018  */
3019 u_int32_t
3020 rde_local_as(void)
3021 {
3022 	return (conf->as);
3023 }
3024 
3025 int
3026 rde_noevaluate(void)
3027 {
3028 	/* do not run while cleaning up */
3029 	if (rde_quit)
3030 		return (1);
3031 
3032 	return (conf->flags & BGPD_FLAG_NO_EVALUATE);
3033 }
3034 
3035 int
3036 rde_decisionflags(void)
3037 {
3038 	return (conf->flags & BGPD_FLAG_DECISION_MASK);
3039 }
3040 
3041 int
3042 rde_as4byte(struct rde_peer *peer)
3043 {
3044 	return (peer->capa.as4byte);
3045 }
3046 
3047 /*
3048  * peer functions
3049  */
3050 struct peer_table {
3051 	struct rde_peer_head	*peer_hashtbl;
3052 	u_int32_t		 peer_hashmask;
3053 } peertable;
3054 
3055 #define PEER_HASH(x)		\
3056 	&peertable.peer_hashtbl[(x) & peertable.peer_hashmask]
3057 
3058 void
3059 peer_init(u_int32_t hashsize)
3060 {
3061 	struct peer_config pc;
3062 	u_int32_t	 hs, i;
3063 
3064 	for (hs = 1; hs < hashsize; hs <<= 1)
3065 		;
3066 	peertable.peer_hashtbl = calloc(hs, sizeof(struct rde_peer_head));
3067 	if (peertable.peer_hashtbl == NULL)
3068 		fatal("peer_init");
3069 
3070 	for (i = 0; i < hs; i++)
3071 		LIST_INIT(&peertable.peer_hashtbl[i]);
3072 	LIST_INIT(&peerlist);
3073 
3074 	peertable.peer_hashmask = hs - 1;
3075 
3076 	bzero(&pc, sizeof(pc));
3077 	snprintf(pc.descr, sizeof(pc.descr), "LOCAL");
3078 
3079 	peerself = peer_add(0, &pc);
3080 	if (peerself == NULL)
3081 		fatalx("peer_init add self");
3082 
3083 	peerself->state = PEER_UP;
3084 }
3085 
3086 void
3087 peer_shutdown(void)
3088 {
3089 	u_int32_t	i;
3090 
3091 	for (i = 0; i <= peertable.peer_hashmask; i++)
3092 		if (!LIST_EMPTY(&peertable.peer_hashtbl[i]))
3093 			log_warnx("peer_free: free non-free table");
3094 
3095 	free(peertable.peer_hashtbl);
3096 }
3097 
3098 struct rde_peer *
3099 peer_get(u_int32_t id)
3100 {
3101 	struct rde_peer_head	*head;
3102 	struct rde_peer		*peer;
3103 
3104 	head = PEER_HASH(id);
3105 
3106 	LIST_FOREACH(peer, head, hash_l) {
3107 		if (peer->conf.id == id)
3108 			return (peer);
3109 	}
3110 	return (NULL);
3111 }
3112 
3113 struct rde_peer *
3114 peer_add(u_int32_t id, struct peer_config *p_conf)
3115 {
3116 	struct rde_peer_head	*head;
3117 	struct rde_peer		*peer;
3118 
3119 	if ((peer = peer_get(id))) {
3120 		memcpy(&peer->conf, p_conf, sizeof(struct peer_config));
3121 		return (NULL);
3122 	}
3123 
3124 	peer = calloc(1, sizeof(struct rde_peer));
3125 	if (peer == NULL)
3126 		fatal("peer_add");
3127 
3128 	LIST_INIT(&peer->path_h);
3129 	memcpy(&peer->conf, p_conf, sizeof(struct peer_config));
3130 	peer->remote_bgpid = 0;
3131 	peer->ribid = rib_find(peer->conf.rib);
3132 	if (peer->ribid == RIB_FAILED)
3133 		fatalx("King Bula's new peer met an unknown RIB");
3134 	peer->state = PEER_NONE;
3135 	up_init(peer);
3136 
3137 	head = PEER_HASH(id);
3138 
3139 	LIST_INSERT_HEAD(head, peer, hash_l);
3140 	LIST_INSERT_HEAD(&peerlist, peer, peer_l);
3141 
3142 	return (peer);
3143 }
3144 
3145 int
3146 peer_localaddrs(struct rde_peer *peer, struct bgpd_addr *laddr)
3147 {
3148 	struct ifaddrs	*ifap, *ifa, *match;
3149 
3150 	if (getifaddrs(&ifap) == -1)
3151 		fatal("getifaddrs");
3152 
3153 	for (match = ifap; match != NULL; match = match->ifa_next)
3154 		if (sa_cmp(laddr, match->ifa_addr) == 0)
3155 			break;
3156 
3157 	if (match == NULL) {
3158 		log_warnx("peer_localaddrs: local address not found");
3159 		return (-1);
3160 	}
3161 
3162 	for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
3163 		if (ifa->ifa_addr->sa_family == AF_INET &&
3164 		    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
3165 			if (ifa->ifa_addr->sa_family ==
3166 			    match->ifa_addr->sa_family)
3167 				ifa = match;
3168 			sa2addr(ifa->ifa_addr, &peer->local_v4_addr);
3169 			break;
3170 		}
3171 	}
3172 	for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
3173 		if (ifa->ifa_addr->sa_family == AF_INET6 &&
3174 		    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
3175 			/*
3176 			 * only accept global scope addresses except explicitly
3177 			 * specified.
3178 			 */
3179 			if (ifa->ifa_addr->sa_family ==
3180 			    match->ifa_addr->sa_family)
3181 				ifa = match;
3182 			else if (IN6_IS_ADDR_LINKLOCAL(
3183 			    &((struct sockaddr_in6 *)ifa->
3184 			    ifa_addr)->sin6_addr) ||
3185 			    IN6_IS_ADDR_SITELOCAL(
3186 			    &((struct sockaddr_in6 *)ifa->
3187 			    ifa_addr)->sin6_addr))
3188 				continue;
3189 			sa2addr(ifa->ifa_addr, &peer->local_v6_addr);
3190 			break;
3191 		}
3192 	}
3193 
3194 	freeifaddrs(ifap);
3195 	return (0);
3196 }
3197 
3198 void
3199 peer_up(u_int32_t id, struct session_up *sup)
3200 {
3201 	struct rde_peer	*peer;
3202 	u_int8_t	 i;
3203 
3204 	peer = peer_get(id);
3205 	if (peer == NULL) {
3206 		log_warnx("peer_up: unknown peer id %d", id);
3207 		return;
3208 	}
3209 
3210 	if (peer->state != PEER_DOWN && peer->state != PEER_NONE &&
3211 	    peer->state != PEER_UP) {
3212 		/*
3213 		 * There is a race condition when doing PEER_ERR -> PEER_DOWN.
3214 		 * So just do a full reset of the peer here.
3215 		 */
3216 		for (i = 0; i < AID_MAX; i++) {
3217 			peer->staletime[i] = 0;
3218 			peer_flush(peer, i);
3219 		}
3220 		up_down(peer);
3221 		peer->prefix_cnt = 0;
3222 		peer->state = PEER_DOWN;
3223 	}
3224 	peer->remote_bgpid = ntohl(sup->remote_bgpid);
3225 	peer->short_as = sup->short_as;
3226 	memcpy(&peer->remote_addr, &sup->remote_addr,
3227 	    sizeof(peer->remote_addr));
3228 	memcpy(&peer->capa, &sup->capa, sizeof(peer->capa));
3229 
3230 	if (peer_localaddrs(peer, &sup->local_addr)) {
3231 		peer->state = PEER_DOWN;
3232 		imsg_compose(ibuf_se, IMSG_SESSION_DOWN, id, 0, -1, NULL, 0);
3233 		return;
3234 	}
3235 
3236 	peer->state = PEER_UP;
3237 	up_init(peer);
3238 
3239 	if (rde_noevaluate())
3240 		/*
3241 		 * no need to dump the table to the peer, there are no active
3242 		 * prefixes anyway. This is a speed up hack.
3243 		 */
3244 		return;
3245 
3246 	for (i = 0; i < AID_MAX; i++) {
3247 		if (peer->capa.mp[i])
3248 			peer_dump(id, i);
3249 	}
3250 }
3251 
3252 void
3253 peer_down(u_int32_t id)
3254 {
3255 	struct rde_peer		*peer;
3256 	struct rde_aspath	*asp, *nasp;
3257 
3258 	peer = peer_get(id);
3259 	if (peer == NULL) {
3260 		log_warnx("peer_down: unknown peer id %d", id);
3261 		return;
3262 	}
3263 	peer->remote_bgpid = 0;
3264 	peer->state = PEER_DOWN;
3265 	up_down(peer);
3266 
3267 	/* walk through per peer RIB list and remove all prefixes. */
3268 	for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) {
3269 		nasp = LIST_NEXT(asp, peer_l);
3270 		path_remove(asp);
3271 	}
3272 	LIST_INIT(&peer->path_h);
3273 	peer->prefix_cnt = 0;
3274 
3275 	/* Deletions are performed in path_remove() */
3276 	rde_send_pftable_commit();
3277 
3278 	LIST_REMOVE(peer, hash_l);
3279 	LIST_REMOVE(peer, peer_l);
3280 	free(peer);
3281 }
3282 
3283 /*
3284  * Flush all routes older then staletime. If staletime is 0 all routes will
3285  * be flushed.
3286  */
3287 void
3288 peer_flush(struct rde_peer *peer, u_int8_t aid)
3289 {
3290 	struct rde_aspath	*asp, *nasp;
3291 	u_int32_t		 rprefixes;
3292 
3293 	rprefixes = 0;
3294 	/* walk through per peer RIB list and remove all stale prefixes. */
3295 	for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) {
3296 		nasp = LIST_NEXT(asp, peer_l);
3297 		rprefixes += path_remove_stale(asp, aid);
3298 	}
3299 
3300 	/* Deletions are performed in path_remove() */
3301 	rde_send_pftable_commit();
3302 
3303 	/* flushed no need to keep staletime */
3304 	peer->staletime[aid] = 0;
3305 
3306 	if (peer->prefix_cnt > rprefixes)
3307 		peer->prefix_cnt -= rprefixes;
3308 	else
3309 		peer->prefix_cnt = 0;
3310 }
3311 
3312 void
3313 peer_stale(u_int32_t id, u_int8_t aid)
3314 {
3315 	struct rde_peer		*peer;
3316 	time_t			 now;
3317 
3318 	peer = peer_get(id);
3319 	if (peer == NULL) {
3320 		log_warnx("peer_stale: unknown peer id %d", id);
3321 		return;
3322 	}
3323 
3324 	/* flush the now even staler routes out */
3325 	if (peer->staletime[aid])
3326 		peer_flush(peer, aid);
3327 	peer->staletime[aid] = now = time(NULL);
3328 
3329 	/* make sure new prefixes start on a higher timestamp */
3330 	do {
3331 		sleep(1);
3332 	} while (now >= time(NULL));
3333 }
3334 
3335 void
3336 peer_dump(u_int32_t id, u_int8_t aid)
3337 {
3338 	struct rde_peer		*peer;
3339 
3340 	peer = peer_get(id);
3341 	if (peer == NULL) {
3342 		log_warnx("peer_dump: unknown peer id %d", id);
3343 		return;
3344 	}
3345 
3346 	if (peer->conf.announce_type == ANNOUNCE_DEFAULT_ROUTE)
3347 		up_generate_default(out_rules, peer, aid);
3348 	else
3349 		rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, peer, aid);
3350 	if (peer->capa.grestart.restart)
3351 		up_generate_marker(peer, aid);
3352 }
3353 
3354 /* End-of-RIB marker, RFC 4724 */
3355 void
3356 peer_recv_eor(struct rde_peer *peer, u_int8_t aid)
3357 {
3358 	peer->prefix_rcvd_eor++;
3359 
3360 	/*
3361 	 * First notify SE to avert a possible race with the restart timeout.
3362 	 * If the timeout fires before this imsg is processed by the SE it will
3363 	 * result in the same operation since the timeout issues a FLUSH which
3364 	 * does the same as the RESTARTED action (flushing stale routes).
3365 	 * The logic in the SE is so that only one of FLUSH or RESTARTED will
3366 	 * be sent back to the RDE and so peer_flush is only called once.
3367 	 */
3368 	if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id,
3369 	    0, -1, &aid, sizeof(aid)) == -1)
3370 		fatal("%s %d imsg_compose error", __func__, __LINE__);
3371 }
3372 
3373 void
3374 peer_send_eor(struct rde_peer *peer, u_int8_t aid)
3375 {
3376 	u_int16_t	afi;
3377 	u_int8_t	safi;
3378 
3379 	peer->prefix_sent_eor++;
3380 
3381 	if (aid == AID_INET) {
3382 		u_char null[4];
3383 
3384 		bzero(&null, 4);
3385 		if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3386 		    0, -1, &null, 4) == -1)
3387 			fatal("%s %d imsg_compose error in peer_send_eor",
3388 			    __func__, __LINE__);
3389 	} else {
3390 		u_int16_t	i;
3391 		u_char		buf[10];
3392 
3393 		if (aid2afi(aid, &afi, &safi) == -1)
3394 			fatalx("peer_send_eor: bad AID");
3395 
3396 		i = 0;	/* v4 withdrawn len */
3397 		bcopy(&i, &buf[0], sizeof(i));
3398 		i = htons(6);	/* path attr len */
3399 		bcopy(&i, &buf[2], sizeof(i));
3400 		buf[4] = ATTR_OPTIONAL;
3401 		buf[5] = ATTR_MP_UNREACH_NLRI;
3402 		buf[6] = 3;	/* withdrawn len */
3403 		i = htons(afi);
3404 		bcopy(&i, &buf[7], sizeof(i));
3405 		buf[9] = safi;
3406 
3407 		if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id,
3408 		    0, -1, &buf, 10) == -1)
3409 			fatal("%s %d imsg_compose error in peer_send_eor",
3410 			    __func__, __LINE__);
3411 	}
3412 }
3413 
3414 /*
3415  * network announcement stuff
3416  */
3417 void
3418 network_add(struct network_config *nc, int flagstatic)
3419 {
3420 	struct rdomain		*rd;
3421 	struct rde_aspath	*asp;
3422 	struct filter_set_head	*vpnset = NULL;
3423 	in_addr_t		 prefix4;
3424 	u_int16_t		 i;
3425 
3426 	if (nc->rtableid) {
3427 		SIMPLEQ_FOREACH(rd, rdomains_l, entry) {
3428 			if (rd->rtableid != nc->rtableid)
3429 				continue;
3430 			switch (nc->prefix.aid) {
3431 			case AID_INET:
3432 				prefix4 = nc->prefix.v4.s_addr;
3433 				bzero(&nc->prefix, sizeof(nc->prefix));
3434 				nc->prefix.aid = AID_VPN_IPv4;
3435 				nc->prefix.vpn4.rd = rd->rd;
3436 				nc->prefix.vpn4.addr.s_addr = prefix4;
3437 				nc->prefix.vpn4.labellen = 3;
3438 				nc->prefix.vpn4.labelstack[0] =
3439 				    (rd->label >> 12) & 0xff;
3440 				nc->prefix.vpn4.labelstack[1] =
3441 				    (rd->label >> 4) & 0xff;
3442 				nc->prefix.vpn4.labelstack[2] =
3443 				    (rd->label << 4) & 0xf0;
3444 				nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS;
3445 				vpnset = &rd->export;
3446 				break;
3447 			default:
3448 				log_warnx("unable to VPNize prefix");
3449 				filterset_free(&nc->attrset);
3450 				return;
3451 			}
3452 			break;
3453 		}
3454 		if (rd == NULL) {
3455 			log_warnx("network_add: "
3456 			    "prefix %s/%u in non-existing rdomain %u",
3457 			    log_addr(&nc->prefix), nc->prefixlen, nc->rtableid);
3458 			return;
3459 		}
3460 	}
3461 
3462 	if (nc->type == NETWORK_MRTCLONE) {
3463 		asp = nc->asp;
3464 	} else {
3465 		asp = path_get();
3466 		asp->aspath = aspath_get(NULL, 0);
3467 		asp->origin = ORIGIN_IGP;
3468 		asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH |
3469 		    F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED;
3470 		/* the nexthop is unset unless a default set overrides it */
3471 	}
3472 	if (!flagstatic)
3473 		asp->flags |= F_ANN_DYNAMIC;
3474 	rde_apply_set(asp, &nc->attrset, nc->prefix.aid, peerself, peerself);
3475 	if (vpnset)
3476 		rde_apply_set(asp, vpnset, nc->prefix.aid, peerself, peerself);
3477 	for (i = 1; i < rib_size; i++) {
3478 		if (*ribs[i].name == '\0')
3479 			break;
3480 		path_update(&ribs[i], peerself, asp, &nc->prefix,
3481 		    nc->prefixlen);
3482 	}
3483 	path_put(asp);
3484 	filterset_free(&nc->attrset);
3485 }
3486 
3487 void
3488 network_delete(struct network_config *nc, int flagstatic)
3489 {
3490 	struct rdomain	*rd;
3491 	in_addr_t	 prefix4;
3492 	u_int32_t	 flags = F_PREFIX_ANNOUNCED;
3493 	u_int32_t	 i;
3494 
3495 	if (!flagstatic)
3496 		flags |= F_ANN_DYNAMIC;
3497 
3498 	if (nc->rtableid) {
3499 		SIMPLEQ_FOREACH(rd, rdomains_l, entry) {
3500 			if (rd->rtableid != nc->rtableid)
3501 				continue;
3502 			switch (nc->prefix.aid) {
3503 			case AID_INET:
3504 				prefix4 = nc->prefix.v4.s_addr;
3505 				bzero(&nc->prefix, sizeof(nc->prefix));
3506 				nc->prefix.aid = AID_VPN_IPv4;
3507 				nc->prefix.vpn4.rd = rd->rd;
3508 				nc->prefix.vpn4.addr.s_addr = prefix4;
3509 				nc->prefix.vpn4.labellen = 3;
3510 				nc->prefix.vpn4.labelstack[0] =
3511 				    (rd->label >> 12) & 0xff;
3512 				nc->prefix.vpn4.labelstack[1] =
3513 				    (rd->label >> 4) & 0xff;
3514 				nc->prefix.vpn4.labelstack[2] =
3515 				    (rd->label << 4) & 0xf0;
3516 				nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS;
3517 				break;
3518 			default:
3519 				log_warnx("unable to VPNize prefix");
3520 				return;
3521 			}
3522 		}
3523 	}
3524 
3525 	for (i = rib_size - 1; i > 0; i--) {
3526 		if (*ribs[i].name == '\0')
3527 			break;
3528 		prefix_remove(&ribs[i], peerself, &nc->prefix, nc->prefixlen,
3529 		    flags);
3530 	}
3531 }
3532 
3533 void
3534 network_dump_upcall(struct rib_entry *re, void *ptr)
3535 {
3536 	struct prefix		*p;
3537 	struct kroute_full	 k;
3538 	struct bgpd_addr	 addr;
3539 	struct rde_dump_ctx	*ctx = ptr;
3540 
3541 	LIST_FOREACH(p, &re->prefix_h, rib_l) {
3542 		if (!(p->aspath->flags & F_PREFIX_ANNOUNCED))
3543 			continue;
3544 		pt_getaddr(p->prefix, &addr);
3545 
3546 		bzero(&k, sizeof(k));
3547 		memcpy(&k.prefix, &addr, sizeof(k.prefix));
3548 		if (p->aspath->nexthop == NULL ||
3549 		    p->aspath->nexthop->state != NEXTHOP_REACH)
3550 			k.nexthop.aid = k.prefix.aid;
3551 		else
3552 			memcpy(&k.nexthop, &p->aspath->nexthop->true_nexthop,
3553 			    sizeof(k.nexthop));
3554 		k.prefixlen = p->prefix->prefixlen;
3555 		k.flags = F_KERNEL;
3556 		if ((p->aspath->flags & F_ANN_DYNAMIC) == 0)
3557 			k.flags = F_STATIC;
3558 		if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0,
3559 		    ctx->req.pid, -1, &k, sizeof(k)) == -1)
3560 			log_warnx("network_dump_upcall: "
3561 			    "imsg_compose error");
3562 	}
3563 }
3564 
3565 /* clean up */
3566 void
3567 rde_shutdown(void)
3568 {
3569 	struct rde_peer		*p;
3570 	u_int32_t		 i;
3571 
3572 	/*
3573 	 * the decision process is turned off if rde_quit = 1 and
3574 	 * rde_shutdown depends on this.
3575 	 */
3576 
3577 	/*
3578 	 * All peers go down
3579 	 */
3580 	for (i = 0; i <= peertable.peer_hashmask; i++)
3581 		while ((p = LIST_FIRST(&peertable.peer_hashtbl[i])) != NULL)
3582 			peer_down(p->conf.id);
3583 
3584 	/* free filters */
3585 	filterlist_free(out_rules);
3586 	for (i = 0; i < rib_size; i++) {
3587 		if (*ribs[i].name == '\0')
3588 			break;
3589 		filterlist_free(ribs[i].in_rules);
3590 	}
3591 
3592 	nexthop_shutdown();
3593 	path_shutdown();
3594 	aspath_shutdown();
3595 	attr_shutdown();
3596 	pt_shutdown();
3597 	peer_shutdown();
3598 }
3599 
3600 int
3601 sa_cmp(struct bgpd_addr *a, struct sockaddr *b)
3602 {
3603 	struct sockaddr_in	*in_b;
3604 	struct sockaddr_in6	*in6_b;
3605 
3606 	if (aid2af(a->aid) != b->sa_family)
3607 		return (1);
3608 
3609 	switch (b->sa_family) {
3610 	case AF_INET:
3611 		in_b = (struct sockaddr_in *)b;
3612 		if (a->v4.s_addr != in_b->sin_addr.s_addr)
3613 			return (1);
3614 		break;
3615 	case AF_INET6:
3616 		in6_b = (struct sockaddr_in6 *)b;
3617 #ifdef __KAME__
3618 		/* directly stolen from sbin/ifconfig/ifconfig.c */
3619 		if (IN6_IS_ADDR_LINKLOCAL(&in6_b->sin6_addr)) {
3620 			in6_b->sin6_scope_id =
3621 			    ntohs(*(u_int16_t *)&in6_b->sin6_addr.s6_addr[2]);
3622 			in6_b->sin6_addr.s6_addr[2] =
3623 			    in6_b->sin6_addr.s6_addr[3] = 0;
3624 		}
3625 #endif
3626 		if (bcmp(&a->v6, &in6_b->sin6_addr,
3627 		    sizeof(struct in6_addr)))
3628 			return (1);
3629 		break;
3630 	default:
3631 		fatal("king bula sez: unknown address family");
3632 		/* NOTREACHED */
3633 	}
3634 
3635 	return (0);
3636 }
3637