xref: /openbsd-src/usr.sbin/ospf6d/kroute.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: kroute.c,v 1.62 2019/12/16 08:28:33 denis Exp $ */
2 
3 /*
4  * Copyright (c) 2004 Esben Norby <norby@openbsd.org>
5  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 #include <sys/socket.h>
22 #include <sys/sysctl.h>
23 #include <sys/tree.h>
24 #include <sys/uio.h>
25 #include <netinet/in.h>
26 #include <arpa/inet.h>
27 #include <net/if.h>
28 #include <net/if_dl.h>
29 #include <net/if_types.h>
30 #include <net/route.h>
31 #include <err.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <limits.h>
39 
40 #include "ospf6d.h"
41 #include "ospfe.h"
42 #include "log.h"
43 
44 struct {
45 	u_int32_t		rtseq;
46 	pid_t			pid;
47 	int			fib_sync;
48 	u_int8_t		fib_prio;
49 	int			fd;
50 	struct event		ev;
51 	u_int			rdomain;
52 } kr_state;
53 
54 struct kroute_node {
55 	RB_ENTRY(kroute_node)	 entry;
56 	struct kroute_node	*next;
57 	struct kroute		 r;
58 };
59 
60 void	kr_redist_remove(struct kroute_node *, struct kroute_node *);
61 int	kr_redist_eval(struct kroute *, struct kroute *);
62 void	kr_redistribute(struct kroute_node *);
63 int	kroute_compare(struct kroute_node *, struct kroute_node *);
64 int	kr_change_fib(struct kroute_node *, struct kroute *, int, int);
65 int	kr_delete_fib(struct kroute_node *);
66 
67 struct kroute_node	*kroute_find(const struct in6_addr *, u_int8_t,
68 			    u_int8_t);
69 struct kroute_node	*kroute_matchgw(struct kroute_node *,
70 			    struct in6_addr *, unsigned int);
71 int			 kroute_insert(struct kroute_node *);
72 int			 kroute_remove(struct kroute_node *);
73 void			 kroute_clear(void);
74 
75 struct iface		*kif_update(u_short, int, struct if_data *,
76 			   struct sockaddr_dl *);
77 int			 kif_validate(u_short);
78 
79 struct kroute_node	*kroute_match(struct in6_addr *);
80 
81 int		protect_lo(void);
82 void		get_rtaddrs(int, struct sockaddr *, struct sockaddr **);
83 void		if_change(u_short, int, struct if_data *, struct sockaddr_dl *);
84 void		if_newaddr(u_short, struct sockaddr_in6 *,
85 		    struct sockaddr_in6 *, struct sockaddr_in6 *);
86 void		if_deladdr(u_short, struct sockaddr_in6 *,
87 		    struct sockaddr_in6 *, struct sockaddr_in6 *);
88 void		if_announce(void *);
89 
90 int		send_rtmsg(int, int, struct kroute *);
91 int		dispatch_rtmsg(void);
92 int		fetchtable(void);
93 int		rtmsg_process(char *, size_t);
94 
95 RB_HEAD(kroute_tree, kroute_node)	krt;
96 RB_PROTOTYPE(kroute_tree, kroute_node, entry, kroute_compare)
97 RB_GENERATE(kroute_tree, kroute_node, entry, kroute_compare)
98 
99 int
100 kr_init(int fs, u_int rdomain, int redis_label_or_prefix, u_int8_t fib_prio)
101 {
102 	int		opt = 0, rcvbuf, default_rcvbuf;
103 	socklen_t	optlen;
104 	int		filter_prio = fib_prio;
105 
106 	kr_state.fib_sync = fs;
107 	kr_state.rdomain = rdomain;
108 	kr_state.fib_prio = fib_prio;
109 
110 	if ((kr_state.fd = socket(AF_ROUTE,
111 	    SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, AF_INET6)) == -1) {
112 		log_warn("kr_init: socket");
113 		return (-1);
114 	}
115 
116 	/* not interested in my own messages */
117 	if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK,
118 	    &opt, sizeof(opt)) == -1)
119 		log_warn("kr_init: setsockopt");	/* not fatal */
120 
121 	if (redis_label_or_prefix) {
122 		filter_prio = 0;
123 		log_info("%s: priority filter disabled", __func__);
124 	} else
125 		log_debug("%s: priority filter enabled", __func__);
126 
127 	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_PRIOFILTER, &filter_prio,
128 	    sizeof(filter_prio)) == -1) {
129 		log_warn("%s: setsockopt AF_ROUTE ROUTE_PRIOFILTER", __func__);
130 		/* not fatal */
131 	}
132 
133 	/* grow receive buffer, don't wanna miss messages */
134 	optlen = sizeof(default_rcvbuf);
135 	if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
136 	    &default_rcvbuf, &optlen) == -1)
137 		log_warn("kr_init getsockopt SOL_SOCKET SO_RCVBUF");
138 	else
139 		for (rcvbuf = MAX_RTSOCK_BUF;
140 		    rcvbuf > default_rcvbuf &&
141 		    setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
142 		    &rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS;
143 		    rcvbuf /= 2)
144 			;	/* nothing */
145 
146 	kr_state.pid = getpid();
147 	kr_state.rtseq = 1;
148 
149 	RB_INIT(&krt);
150 
151 	if (fetchtable() == -1)
152 		return (-1);
153 
154 	if (protect_lo() == -1)
155 		return (-1);
156 
157 	event_set(&kr_state.ev, kr_state.fd, EV_READ | EV_PERSIST,
158 	    kr_dispatch_msg, NULL);
159 	event_add(&kr_state.ev, NULL);
160 
161 	return (0);
162 }
163 
164 int
165 kr_change_fib(struct kroute_node *kr, struct kroute *kroute, int krcount,
166     int action)
167 {
168 	int			 i;
169 	struct kroute_node	*kn, *nkn;
170 
171 	if (action == RTM_ADD) {
172 		/*
173 		 * First remove all stale multipath routes.
174 		 * This step must be skipped when the action is RTM_CHANGE
175 		 * because it is already a single path route that will be
176 		 * changed.
177 		 */
178 		for (kn = kr; kn != NULL; kn = nkn) {
179 			for (i = 0; i < krcount; i++) {
180 				if (kn->r.scope == kroute[i].scope &&
181 				    IN6_ARE_ADDR_EQUAL(&kn->r.nexthop,
182 				    &kroute[i].nexthop))
183 					break;
184 			}
185 			nkn = kn->next;
186 			if (i == krcount) {
187 				/* stale route */
188 				if (kr_delete_fib(kn) == -1)
189 					log_warnx("kr_delete_fib failed");
190 				/*
191 				 * if head element was removed we need to adjust
192 				 * the head
193 				 */
194 				if (kr == kn)
195 					kr = nkn;
196 			}
197 		}
198 	}
199 
200 	/*
201 	 * now add or change the route
202 	 */
203 	for (i = 0; i < krcount; i++) {
204 		/* nexthop ::1 -> ignore silently */
205 		if (IN6_IS_ADDR_LOOPBACK(&kroute[i].nexthop))
206 			continue;
207 
208 		if (action == RTM_ADD && kr) {
209 			for (kn = kr; kn != NULL; kn = kn->next) {
210 				if (kn->r.scope == kroute[i].scope &&
211 				    IN6_ARE_ADDR_EQUAL(&kn->r.nexthop,
212 				    &kroute[i].nexthop))
213 					break;
214 			}
215 
216 			if (kn != NULL)
217 				/* nexthop already present, skip it */
218 				continue;
219 		} else
220 			/* modify first entry */
221 			kn = kr;
222 
223 		/* send update */
224 		if (send_rtmsg(kr_state.fd, action, &kroute[i]) == -1)
225 			return (-1);
226 
227 		/* create new entry unless we are changing the first entry */
228 		if (action == RTM_ADD)
229 			if ((kn = calloc(1, sizeof(*kn))) == NULL)
230 				fatal(NULL);
231 
232 		kn->r.prefix = kroute[i].prefix;
233 		kn->r.prefixlen = kroute[i].prefixlen;
234 		kn->r.nexthop = kroute[i].nexthop;
235 		kn->r.scope = kroute[i].scope;
236 		kn->r.flags = kroute[i].flags | F_OSPFD_INSERTED;
237 		kn->r.priority = kr_state.fib_prio;
238 		kn->r.ext_tag = kroute[i].ext_tag;
239 		rtlabel_unref(kn->r.rtlabel);	/* for RTM_CHANGE */
240 		kn->r.rtlabel = kroute[i].rtlabel;
241 
242 		if (action == RTM_ADD)
243 			if (kroute_insert(kn) == -1) {
244 				log_debug("kr_update_fib: cannot insert %s",
245 				    log_in6addr(&kn->r.nexthop));
246 				free(kn);
247 			}
248 		action = RTM_ADD;
249 	}
250 	return  (0);
251 }
252 
253 int
254 kr_change(struct kroute *kroute, int krcount)
255 {
256 	struct kroute_node	*kr;
257 	int			 action = RTM_ADD;
258 
259 	kroute->rtlabel = rtlabel_tag2id(kroute->ext_tag);
260 
261 	kr = kroute_find(&kroute->prefix, kroute->prefixlen, kr_state.fib_prio);
262 	if (kr != NULL && kr->next == NULL && krcount == 1) {
263 		/*
264 		 * single path OSPF route.
265 		 * The kernel does not allow to change a gateway route to a
266 		 * cloning route or contrary. In this case remove and add the
267 		 * route, otherwise change the existing one.
268 		 */
269 		if ((IN6_IS_ADDR_UNSPECIFIED(&kroute->nexthop) &&
270 		    !IN6_IS_ADDR_UNSPECIFIED(&kr->r.nexthop)) ||
271 		    (!IN6_IS_ADDR_UNSPECIFIED(&kroute->nexthop) &&
272 		    IN6_IS_ADDR_UNSPECIFIED(&kr->r.nexthop))) {
273 			if (kr_delete_fib(kr) == 0)
274 				kr = NULL;
275 			else {
276 				log_warn("kr_change: failed to remove route: "
277 				    "%s/%d", log_in6addr(&kr->r.prefix),
278 				    kr->r.prefixlen);
279 				return (-1);
280 			}
281 		} else
282 			action = RTM_CHANGE;
283 	}
284 
285 	return (kr_change_fib(kr, kroute, krcount, action));
286 }
287 
288 int
289 kr_delete_fib(struct kroute_node *kr)
290 {
291 	if (kr->r.priority != kr_state.fib_prio)
292 		log_warn("kr_delete_fib: %s/%d has wrong priority %d",
293 		    log_in6addr(&kr->r.prefix), kr->r.prefixlen,
294 		    kr->r.priority);
295 
296 	if (send_rtmsg(kr_state.fd, RTM_DELETE, &kr->r) == -1)
297 		return (-1);
298 
299 	if (kroute_remove(kr) == -1)
300 		return (-1);
301 
302 	return (0);
303 }
304 
305 int
306 kr_delete(struct kroute *kroute)
307 {
308 	struct kroute_node	*kr, *nkr;
309 
310 	if ((kr = kroute_find(&kroute->prefix, kroute->prefixlen,
311 	    kr_state.fib_prio)) == NULL)
312 		return (0);
313 
314 	while (kr != NULL) {
315 		nkr = kr->next;
316 		if (kr_delete_fib(kr) == -1)
317 			return (-1);
318 		kr = nkr;
319 	}
320 
321 	return (0);
322 }
323 
324 void
325 kr_shutdown(void)
326 {
327 	kr_fib_decouple();
328 	kroute_clear();
329 }
330 
331 void
332 kr_fib_couple(void)
333 {
334 	struct kroute_node	*kr;
335 	struct kroute_node	*kn;
336 
337 	if (kr_state.fib_sync == 1)	/* already coupled */
338 		return;
339 
340 	kr_state.fib_sync = 1;
341 
342 	RB_FOREACH(kr, kroute_tree, &krt)
343 		if (kr->r.priority == kr_state.fib_prio)
344 			for (kn = kr; kn != NULL; kn = kn->next)
345 				send_rtmsg(kr_state.fd, RTM_ADD, &kn->r);
346 
347 	log_info("kernel routing table coupled");
348 }
349 
350 void
351 kr_fib_decouple(void)
352 {
353 	struct kroute_node	*kr;
354 	struct kroute_node	*kn;
355 
356 	if (kr_state.fib_sync == 0)	/* already decoupled */
357 		return;
358 
359 	RB_FOREACH(kr, kroute_tree, &krt)
360 		if (kr->r.priority == kr_state.fib_prio)
361 			for (kn = kr; kn != NULL; kn = kn->next)
362 				send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r);
363 
364 	kr_state.fib_sync = 0;
365 
366 	log_info("kernel routing table decoupled");
367 }
368 
369 void
370 kr_fib_update_prio(u_int8_t fib_prio)
371 {
372 	struct kroute_node	*kr;
373 
374 	RB_FOREACH(kr, kroute_tree, &krt)
375 		if ((kr->r.flags & F_OSPFD_INSERTED))
376 			kr->r.priority = fib_prio;
377 
378 	log_info("fib priority changed from %hhu to %hhu", kr_state.fib_prio,
379 	    fib_prio);
380 
381 	kr_state.fib_prio = fib_prio;
382 }
383 
384 /* ARGSUSED */
385 void
386 kr_dispatch_msg(int fd, short event, void *bula)
387 {
388 	/* XXX this is stupid */
389 	dispatch_rtmsg();
390 }
391 
392 void
393 kr_show_route(struct imsg *imsg)
394 {
395 	struct kroute_node	*kr;
396 	struct kroute_node	*kn;
397 	int			 flags;
398 	struct in6_addr		 addr;
399 
400 	switch (imsg->hdr.type) {
401 	case IMSG_CTL_KROUTE:
402 		if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(flags)) {
403 			log_warnx("kr_show_route: wrong imsg len");
404 			return;
405 		}
406 		memcpy(&flags, imsg->data, sizeof(flags));
407 		RB_FOREACH(kr, kroute_tree, &krt)
408 			if (!flags || kr->r.flags & flags) {
409 				kn = kr;
410 				do {
411 					main_imsg_compose_ospfe(IMSG_CTL_KROUTE,
412 					    imsg->hdr.pid,
413 					    &kn->r, sizeof(kn->r));
414 				} while ((kn = kn->next) != NULL);
415 			}
416 		break;
417 	case IMSG_CTL_KROUTE_ADDR:
418 		if (imsg->hdr.len != IMSG_HEADER_SIZE +
419 		    sizeof(struct in6_addr)) {
420 			log_warnx("kr_show_route: wrong imsg len");
421 			return;
422 		}
423 		memcpy(&addr, imsg->data, sizeof(addr));
424 		kr = kroute_match(&addr);
425 		if (kr != NULL)
426 			main_imsg_compose_ospfe(IMSG_CTL_KROUTE, imsg->hdr.pid,
427 			    &kr->r, sizeof(kr->r));
428 		break;
429 	default:
430 		log_debug("kr_show_route: error handling imsg");
431 		break;
432 	}
433 
434 	main_imsg_compose_ospfe(IMSG_CTL_END, imsg->hdr.pid, NULL, 0);
435 }
436 
437 void
438 kr_redist_remove(struct kroute_node *kh, struct kroute_node *kn)
439 {
440 	struct kroute	 *kr;
441 
442 	/* was the route redistributed? */
443 	if ((kn->r.flags & F_REDISTRIBUTED) == 0)
444 		return;
445 
446 	/* remove redistributed flag */
447 	kn->r.flags &= ~F_REDISTRIBUTED;
448 	kr = &kn->r;
449 
450 	/* probably inform the RDE (check if no other path is redistributed) */
451 	for (kn = kh; kn; kn = kn->next)
452 		if (kn->r.flags & F_REDISTRIBUTED)
453 			break;
454 
455 	if (kn == NULL)
456 		main_imsg_compose_rde(IMSG_NETWORK_DEL, 0, kr,
457 		    sizeof(struct kroute));
458 }
459 
460 int
461 kr_redist_eval(struct kroute *kr, struct kroute *new_kr)
462 {
463 	u_int32_t	 metric = 0;
464 
465 	/* Only non-ospfd routes are considered for redistribution. */
466 	if (!(kr->flags & F_KERNEL))
467 		goto dont_redistribute;
468 
469 	/* Dynamic routes are not redistributable. */
470 	if (kr->flags & F_DYNAMIC)
471 		goto dont_redistribute;
472 
473 	/* interface is not up and running so don't announce */
474 	if (kr->flags & F_DOWN)
475 		goto dont_redistribute;
476 
477 	/*
478 	 * We consider loopback, multicast, link- and site-local,
479 	 * IPv4 mapped and IPv4 compatible addresses as not redistributable.
480 	 */
481 	if (IN6_IS_ADDR_LOOPBACK(&kr->prefix) ||
482 	    IN6_IS_ADDR_MULTICAST(&kr->prefix) ||
483 	    IN6_IS_ADDR_LINKLOCAL(&kr->prefix) ||
484 	    IN6_IS_ADDR_SITELOCAL(&kr->prefix) ||
485 	    IN6_IS_ADDR_V4MAPPED(&kr->prefix) ||
486 	    IN6_IS_ADDR_V4COMPAT(&kr->prefix))
487 		goto dont_redistribute;
488 	/*
489 	 * Consider networks with nexthop loopback as not redistributable
490 	 * unless it is a reject or blackhole route.
491 	 */
492 	if (IN6_IS_ADDR_LOOPBACK(&kr->nexthop) &&
493 	    !(kr->flags & (F_BLACKHOLE|F_REJECT)))
494 		goto dont_redistribute;
495 
496 	/* Should we redistribute this route? */
497 	if (!ospf_redistribute(kr, &metric))
498 		goto dont_redistribute;
499 
500 	/* prefix should be redistributed */
501 	kr->flags |= F_REDISTRIBUTED;
502 	/*
503 	 * only one of all multipath routes can be redistributed so
504 	 * redistribute the best one.
505 	 */
506 	if (new_kr->metric > metric) {
507 		*new_kr = *kr;
508 		new_kr->metric = metric;
509 	}
510 
511 	return (1);
512 
513 dont_redistribute:
514 	/* was the route redistributed? */
515 	if ((kr->flags & F_REDISTRIBUTED) == 0)
516 		return (0);
517 
518 	kr->flags &= ~F_REDISTRIBUTED;
519 	return (1);
520 }
521 
522 void
523 kr_redistribute(struct kroute_node *kh)
524 {
525 	struct kroute_node	*kn;
526 	struct kroute		 kr;
527 	int			 redistribute = 0;
528 
529 	/* only the highest prio route can be redistributed */
530 	if (kroute_find(&kh->r.prefix, kh->r.prefixlen, RTP_ANY) != kh)
531 		return;
532 
533 	bzero(&kr, sizeof(kr));
534 	kr.metric = UINT_MAX;
535 	for (kn = kh; kn; kn = kn->next)
536 		if (kr_redist_eval(&kn->r, &kr))
537 			redistribute = 1;
538 
539 	if (!redistribute)
540 		return;
541 
542 	if (kr.flags & F_REDISTRIBUTED) {
543 		main_imsg_compose_rde(IMSG_NETWORK_ADD, 0, &kr,
544 		    sizeof(struct kroute));
545 	} else {
546 		kr = kh->r;
547 		main_imsg_compose_rde(IMSG_NETWORK_DEL, 0, &kr,
548 		    sizeof(struct kroute));
549 	}
550 }
551 
552 void
553 kr_reload(int redis_label_or_prefix)
554 {
555 	struct kroute_node	*kr, *kn;
556 	u_int32_t		 dummy;
557 	int			 r;
558 	int			 filter_prio = kr_state.fib_prio;
559 
560 	/* update the priority filter */
561 	if (redis_label_or_prefix) {
562 		filter_prio = 0;
563 		log_info("%s: priority filter disabled", __func__);
564 	} else
565 		log_debug("%s: priority filter enabled", __func__);
566 
567 	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_PRIOFILTER, &filter_prio,
568 	    sizeof(filter_prio)) == -1) {
569 		log_warn("%s: setsockopt AF_ROUTE ROUTE_PRIOFILTER", __func__);
570 		/* not fatal */
571 	}
572 
573 	RB_FOREACH(kr, kroute_tree, &krt) {
574 		for (kn = kr; kn; kn = kn->next) {
575 			r = ospf_redistribute(&kn->r, &dummy);
576 			/*
577 			 * if it is redistributed, redistribute again metric
578 			 * may have changed.
579 			 */
580 			if ((kn->r.flags & F_REDISTRIBUTED && !r) || r)
581 				break;
582 		}
583 		if (kn) {
584 			/*
585 			 * kr_redistribute copes with removes and RDE with
586 			 * duplicates
587 			 */
588 			kr_redistribute(kr);
589 		}
590 	}
591 }
592 
593 /* rb-tree compare */
594 int
595 kroute_compare(struct kroute_node *a, struct kroute_node *b)
596 {
597 	int	i;
598 
599 	/* XXX maybe switch a & b */
600 	i = memcmp(&a->r.prefix, &b->r.prefix, sizeof(a->r.prefix));
601 	if (i)
602 		return (i);
603 	if (a->r.prefixlen < b->r.prefixlen)
604 		return (-1);
605 	if (a->r.prefixlen > b->r.prefixlen)
606 		return (1);
607 
608 	/* if the priority is RTP_ANY finish on the first address hit */
609 	if (a->r.priority == RTP_ANY || b->r.priority == RTP_ANY)
610 		return (0);
611 	if (a->r.priority < b->r.priority)
612 		return (-1);
613 	if (a->r.priority > b->r.priority)
614 		return (1);
615 	return (0);
616 }
617 
618 /* tree management */
619 struct kroute_node *
620 kroute_find(const struct in6_addr *prefix, u_int8_t prefixlen, u_int8_t prio)
621 {
622 	struct kroute_node	s;
623 	struct kroute_node	*kn, *tmp;
624 
625 	s.r.prefix = *prefix;
626 	s.r.prefixlen = prefixlen;
627 	s.r.priority = prio;
628 
629 	kn = RB_FIND(kroute_tree, &krt, &s);
630 	if (kn && prio == RTP_ANY) {
631 		tmp = RB_PREV(kroute_tree, &krt, kn);
632 		while (tmp) {
633 			if (kroute_compare(&s, tmp) == 0)
634 				kn = tmp;
635 			else
636 				break;
637 			tmp = RB_PREV(kroute_tree, &krt, kn);
638 		}
639 	}
640 	return (kn);
641 }
642 
643 struct kroute_node *
644 kroute_matchgw(struct kroute_node *kr, struct in6_addr *nh, unsigned int scope)
645 {
646 	while (kr) {
647 		if (scope == kr->r.scope &&
648 		    IN6_ARE_ADDR_EQUAL(&kr->r.nexthop, nh))
649 			return (kr);
650 		kr = kr->next;
651 	}
652 
653 	return (NULL);
654 }
655 
656 int
657 kroute_insert(struct kroute_node *kr)
658 {
659 	struct kroute_node	*krm, *krh;
660 
661 	if ((krh = RB_INSERT(kroute_tree, &krt, kr)) != NULL) {
662 		/*
663 		 * Multipath route, add at end of list.
664 		 */
665 		krm = krh;
666 		while (krm->next != NULL)
667 			krm = krm->next;
668 		krm->next = kr;
669 		kr->next = NULL; /* to be sure */
670 	} else
671 		krh = kr;
672 
673 	if (!(kr->r.flags & F_KERNEL)) {
674 		/* don't validate or redistribute ospf route */
675 		kr->r.flags &= ~F_DOWN;
676 		return (0);
677 	}
678 
679 	if (kif_validate(kr->r.ifindex))
680 		kr->r.flags &= ~F_DOWN;
681 	else
682 		kr->r.flags |= F_DOWN;
683 
684 	kr_redistribute(krh);
685 	return (0);
686 }
687 
688 int
689 kroute_remove(struct kroute_node *kr)
690 {
691 	struct kroute_node	*krm;
692 
693 	if ((krm = RB_FIND(kroute_tree, &krt, kr)) == NULL) {
694 		log_warnx("kroute_remove failed to find %s/%u",
695 		    log_in6addr(&kr->r.prefix), kr->r.prefixlen);
696 		return (-1);
697 	}
698 
699 	if (krm == kr) {
700 		/* head element */
701 		if (RB_REMOVE(kroute_tree, &krt, kr) == NULL) {
702 			log_warnx("kroute_remove failed for %s/%u",
703 			    log_in6addr(&kr->r.prefix), kr->r.prefixlen);
704 			return (-1);
705 		}
706 		if (kr->next != NULL) {
707 			if (RB_INSERT(kroute_tree, &krt, kr->next) != NULL) {
708 				log_warnx("kroute_remove failed to add %s/%u",
709 				    log_in6addr(&kr->r.prefix),
710 				    kr->r.prefixlen);
711 				return (-1);
712 			}
713 		}
714 	} else {
715 		/* somewhere in the list */
716 		while (krm->next != kr && krm->next != NULL)
717 			krm = krm->next;
718 		if (krm->next == NULL) {
719 			log_warnx("kroute_remove multipath list corrupted "
720 			    "for %s/%u", log_in6addr(&kr->r.prefix),
721 			    kr->r.prefixlen);
722 			return (-1);
723 		}
724 		krm->next = kr->next;
725 	}
726 
727 	kr_redist_remove(krm, kr);
728 	rtlabel_unref(kr->r.rtlabel);
729 
730 	free(kr);
731 	return (0);
732 }
733 
734 void
735 kroute_clear(void)
736 {
737 	struct kroute_node	*kr;
738 
739 	while ((kr = RB_MIN(kroute_tree, &krt)) != NULL)
740 		kroute_remove(kr);
741 }
742 
743 struct iface *
744 kif_update(u_short ifindex, int flags, struct if_data *ifd,
745     struct sockaddr_dl *sdl)
746 {
747 	struct iface	*iface;
748 	char		 ifname[IF_NAMESIZE];
749 
750 	if ((iface = if_find(ifindex)) == NULL) {
751 		bzero(ifname, sizeof(ifname));
752 		if (sdl && sdl->sdl_family == AF_LINK) {
753 			if (sdl->sdl_nlen >= sizeof(ifname))
754 				memcpy(ifname, sdl->sdl_data,
755 				    sizeof(ifname) - 1);
756 			else if (sdl->sdl_nlen > 0)
757 				memcpy(ifname, sdl->sdl_data, sdl->sdl_nlen);
758 			else
759 				return (NULL);
760 		} else
761 			return (NULL);
762 		if ((iface = if_new(ifindex, ifname)) == NULL)
763 			return (NULL);
764 		iface->cflags |= F_IFACE_AVAIL;
765 	}
766 
767 	if_update(iface, ifd->ifi_mtu, flags, ifd->ifi_type,
768 	    ifd->ifi_link_state, ifd->ifi_baudrate, ifd->ifi_rdomain);
769 
770 	return (iface);
771 }
772 
773 int
774 kif_validate(u_short ifindex)
775 {
776 	struct iface	*iface;
777 
778 	if ((iface = if_find(ifindex)) == NULL) {
779 		log_warnx("interface with index %u not found", ifindex);
780 		return (-1);
781 	}
782 
783 	return ((iface->flags & IFF_UP) && LINK_STATE_IS_UP(iface->linkstate));
784 }
785 
786 struct kroute_node *
787 kroute_match(struct in6_addr *key)
788 {
789 	int			 i;
790 	struct kroute_node	*kr;
791 	struct in6_addr		 ina;
792 
793 	/* we will never match the default route */
794 	for (i = 128; i > 0; i--) {
795 		inet6applymask(&ina, key, i);
796 		if ((kr = kroute_find(&ina, i, RTP_ANY)) != NULL)
797 			return (kr);
798 	}
799 
800 	/* if we don't have a match yet, try to find a default route */
801 	if ((kr = kroute_find(&in6addr_any, 0, RTP_ANY)) != NULL)
802 			return (kr);
803 
804 	return (NULL);
805 }
806 
807 /* misc */
808 int
809 protect_lo(void)
810 {
811 	struct kroute_node	*kr;
812 
813 	/* special protection for loopback */
814 	if ((kr = calloc(1, sizeof(struct kroute_node))) == NULL) {
815 		log_warn("protect_lo");
816 		return (-1);
817 	}
818 	memcpy(&kr->r.prefix, &in6addr_loopback, sizeof(kr->r.prefix));
819 	kr->r.prefixlen = 128;
820 	kr->r.flags = F_KERNEL|F_CONNECTED;
821 
822 	if (RB_INSERT(kroute_tree, &krt, kr) != NULL)
823 		free(kr);	/* kernel route already there, no problem */
824 
825 	return (0);
826 }
827 
828 #define ROUNDUP(a) \
829 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
830 
831 void
832 get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info)
833 {
834 	int	i;
835 
836 	for (i = 0; i < RTAX_MAX; i++) {
837 		if (addrs & (1 << i)) {
838 			rti_info[i] = sa;
839 			sa = (struct sockaddr *)((char *)(sa) +
840 			    ROUNDUP(sa->sa_len));
841 		} else
842 			rti_info[i] = NULL;
843 	}
844 }
845 
846 void
847 if_change(u_short ifindex, int flags, struct if_data *ifd,
848     struct sockaddr_dl *sdl)
849 {
850 	struct kroute_node	*kr, *tkr;
851 	struct iface		*iface;
852 	u_int8_t		 wasvalid, isvalid;
853 
854 	wasvalid = kif_validate(ifindex);
855 
856 	if ((iface = kif_update(ifindex, flags, ifd, sdl)) == NULL) {
857 		log_warn("if_change: kif_update(%u)", ifindex);
858 		return;
859 	}
860 
861 	/* inform engine and rde about state change */
862 	main_imsg_compose_rde(IMSG_IFINFO, 0, iface, sizeof(struct iface));
863 	main_imsg_compose_ospfe(IMSG_IFINFO, 0, iface, sizeof(struct iface));
864 
865 	isvalid = (iface->flags & IFF_UP) &&
866 	    LINK_STATE_IS_UP(iface->linkstate);
867 
868 	if (wasvalid == isvalid)
869 		return;		/* nothing changed wrt validity */
870 
871 	/* update redistribute list */
872 	RB_FOREACH(kr, kroute_tree, &krt) {
873 		for (tkr = kr; tkr != NULL; tkr = tkr->next) {
874 			if (tkr->r.ifindex == ifindex) {
875 				if (isvalid)
876 					tkr->r.flags &= ~F_DOWN;
877 				else
878 					tkr->r.flags |= F_DOWN;
879 
880 			}
881 		}
882 		kr_redistribute(kr);
883 	}
884 }
885 
886 void
887 if_newaddr(u_short ifindex, struct sockaddr_in6 *ifa, struct sockaddr_in6 *mask,
888     struct sockaddr_in6 *brd)
889 {
890 	struct iface		*iface;
891 	struct iface_addr	*ia;
892 	struct ifaddrchange	 ifc;
893 
894 	if (ifa == NULL || ifa->sin6_family != AF_INET6)
895 		return;
896 	if ((iface = if_find(ifindex)) == NULL) {
897 		log_warnx("if_newaddr: corresponding if %d not found", ifindex);
898 		return;
899 	}
900 
901 	/* We only care about link-local and global-scope. */
902 	if (IN6_IS_ADDR_UNSPECIFIED(&ifa->sin6_addr) ||
903 	    IN6_IS_ADDR_LOOPBACK(&ifa->sin6_addr) ||
904 	    IN6_IS_ADDR_MULTICAST(&ifa->sin6_addr) ||
905 	    IN6_IS_ADDR_SITELOCAL(&ifa->sin6_addr) ||
906 	    IN6_IS_ADDR_V4MAPPED(&ifa->sin6_addr) ||
907 	    IN6_IS_ADDR_V4COMPAT(&ifa->sin6_addr))
908 		return;
909 
910 	clearscope(&ifa->sin6_addr);
911 
912 	if (IN6_IS_ADDR_LINKLOCAL(&ifa->sin6_addr) ||
913 	    iface->flags & IFF_LOOPBACK)
914 		iface->addr = ifa->sin6_addr;
915 
916 	if ((ia = calloc(1, sizeof(struct iface_addr))) == NULL)
917 		fatal("if_newaddr");
918 
919 	ia->addr = ifa->sin6_addr;
920 
921 	if (mask)
922 		ia->prefixlen = mask2prefixlen(mask);
923 	else
924 		ia->prefixlen = 0;
925 	if (brd && brd->sin6_family == AF_INET6)
926 		ia->dstbrd = brd->sin6_addr;
927 	else
928 		bzero(&ia->dstbrd, sizeof(ia->dstbrd));
929 
930 	switch (iface->type) {
931 	case IF_TYPE_BROADCAST:
932 	case IF_TYPE_NBMA:
933 		log_debug("if_newaddr: ifindex %u, addr %s/%d",
934 		    ifindex, log_in6addr(&ia->addr), ia->prefixlen);
935 		break;
936 	case IF_TYPE_VIRTUALLINK:	/* FIXME */
937 		break;
938 	case IF_TYPE_POINTOPOINT:
939 	case IF_TYPE_POINTOMULTIPOINT:
940 		log_debug("if_newaddr: ifindex %u, addr %s/%d, "
941 		    "dest %s", ifindex, log_in6addr(&ia->addr),
942 		    ia->prefixlen, log_in6addr(&ia->dstbrd));
943 		break;
944 	default:
945 		fatalx("if_newaddr: unknown interface type");
946 	}
947 
948 	TAILQ_INSERT_TAIL(&iface->ifa_list, ia, entry);
949 	/* inform engine and rde if interface is used */
950 	if (iface->cflags & F_IFACE_CONFIGURED) {
951 		ifc.addr = ia->addr;
952 		ifc.dstbrd = ia->dstbrd;
953 		ifc.prefixlen = ia->prefixlen;
954 		ifc.ifindex = ifindex;
955 		main_imsg_compose_ospfe(IMSG_IFADDRNEW, 0, &ifc, sizeof(ifc));
956 		main_imsg_compose_rde(IMSG_IFADDRNEW, 0, &ifc, sizeof(ifc));
957 	}
958 }
959 
960 void
961 if_deladdr(u_short ifindex, struct sockaddr_in6 *ifa, struct sockaddr_in6 *mask,
962     struct sockaddr_in6 *brd)
963 {
964 	struct iface		*iface;
965 	struct iface_addr	*ia, *nia;
966 	struct ifaddrchange	 ifc;
967 
968 	if (ifa == NULL || ifa->sin6_family != AF_INET6)
969 		return;
970 	if ((iface = if_find(ifindex)) == NULL) {
971 		log_warnx("if_deladdr: corresponding if %d not found", ifindex);
972 		return;
973 	}
974 
975 	/* We only care about link-local and global-scope. */
976 	if (IN6_IS_ADDR_UNSPECIFIED(&ifa->sin6_addr) ||
977 	    IN6_IS_ADDR_LOOPBACK(&ifa->sin6_addr) ||
978 	    IN6_IS_ADDR_MULTICAST(&ifa->sin6_addr) ||
979 	    IN6_IS_ADDR_SITELOCAL(&ifa->sin6_addr) ||
980 	    IN6_IS_ADDR_V4MAPPED(&ifa->sin6_addr) ||
981 	    IN6_IS_ADDR_V4COMPAT(&ifa->sin6_addr))
982 		return;
983 
984 	clearscope(&ifa->sin6_addr);
985 
986 	for (ia = TAILQ_FIRST(&iface->ifa_list); ia != NULL; ia = nia) {
987 		nia = TAILQ_NEXT(ia, entry);
988 
989 		if (IN6_ARE_ADDR_EQUAL(&ia->addr, &ifa->sin6_addr)) {
990 			log_debug("if_deladdr: ifindex %u, addr %s/%d",
991 			    ifindex, log_in6addr(&ia->addr), ia->prefixlen);
992 			TAILQ_REMOVE(&iface->ifa_list, ia, entry);
993 			/* inform engine and rde if interface is used */
994 			if (iface->cflags & F_IFACE_CONFIGURED) {
995 				ifc.addr = ia->addr;
996 				ifc.dstbrd = ia->dstbrd;
997 				ifc.prefixlen = ia->prefixlen;
998 				ifc.ifindex = ifindex;
999 				main_imsg_compose_ospfe(IMSG_IFADDRDEL, 0, &ifc,
1000 				    sizeof(ifc));
1001 				main_imsg_compose_rde(IMSG_IFADDRDEL, 0, &ifc,
1002 				    sizeof(ifc));
1003 			}
1004 			free(ia);
1005 			return;
1006 		}
1007 	}
1008 }
1009 
1010 void
1011 if_announce(void *msg)
1012 {
1013 	struct if_announcemsghdr	*ifan;
1014 	struct iface			*iface;
1015 
1016 	ifan = msg;
1017 
1018 	switch (ifan->ifan_what) {
1019 	case IFAN_ARRIVAL:
1020 		if ((iface = if_new(ifan->ifan_index, ifan->ifan_name)) == NULL)
1021 			fatal("if_announce failed");
1022 		iface->cflags |= F_IFACE_AVAIL;
1023 		break;
1024 	case IFAN_DEPARTURE:
1025 		iface = if_find(ifan->ifan_index);
1026 		if (iface->cflags & F_IFACE_CONFIGURED) {
1027 			main_imsg_compose_rde(IMSG_IFDELETE, 0,
1028 			    &iface->ifindex, sizeof(iface->ifindex));
1029 			main_imsg_compose_ospfe(IMSG_IFDELETE, 0,
1030 			    &iface->ifindex, sizeof(iface->ifindex));
1031 		}
1032 		if_del(iface);
1033 		break;
1034 	}
1035 }
1036 
1037 /* rtsock */
1038 int
1039 send_rtmsg(int fd, int action, struct kroute *kroute)
1040 {
1041 	struct iovec		iov[5];
1042 	struct rt_msghdr	hdr;
1043 	struct pad {
1044 		struct sockaddr_in6	addr;
1045 		char			pad[sizeof(long)]; /* thank you IPv6 */
1046 	} prefix, nexthop, mask;
1047 	struct {
1048 		struct sockaddr_dl	addr;
1049 		char			pad[sizeof(long)];
1050 	} ifp;
1051 	struct sockaddr_rtlabel	sa_rl;
1052 	int			iovcnt = 0;
1053 	const char		*label;
1054 
1055 	if (kr_state.fib_sync == 0)
1056 		return (0);
1057 
1058 	/* initialize header */
1059 	bzero(&hdr, sizeof(hdr));
1060 	hdr.rtm_version = RTM_VERSION;
1061 	hdr.rtm_type = action;
1062 	hdr.rtm_priority = kr_state.fib_prio;
1063 	hdr.rtm_tableid = kr_state.rdomain;	/* rtableid */
1064 	if (action == RTM_CHANGE)
1065 		hdr.rtm_fmask = RTF_REJECT|RTF_BLACKHOLE;
1066 	else
1067 		hdr.rtm_flags = RTF_MPATH;
1068 	hdr.rtm_seq = kr_state.rtseq++;	/* overflow doesn't matter */
1069 	hdr.rtm_hdrlen = sizeof(hdr);
1070 	hdr.rtm_msglen = sizeof(hdr);
1071 	/* adjust iovec */
1072 	iov[iovcnt].iov_base = &hdr;
1073 	iov[iovcnt++].iov_len = sizeof(hdr);
1074 
1075 	bzero(&prefix, sizeof(prefix));
1076 	prefix.addr.sin6_len = sizeof(struct sockaddr_in6);
1077 	prefix.addr.sin6_family = AF_INET6;
1078 	prefix.addr.sin6_addr = kroute->prefix;
1079 	/* adjust header */
1080 	hdr.rtm_addrs |= RTA_DST;
1081 	hdr.rtm_msglen += ROUNDUP(sizeof(struct sockaddr_in6));
1082 	/* adjust iovec */
1083 	iov[iovcnt].iov_base = &prefix;
1084 	iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_in6));
1085 
1086 	if (!IN6_IS_ADDR_UNSPECIFIED(&kroute->nexthop)) {
1087 		bzero(&nexthop, sizeof(nexthop));
1088 		nexthop.addr.sin6_len = sizeof(struct sockaddr_in6);
1089 		nexthop.addr.sin6_family = AF_INET6;
1090 		nexthop.addr.sin6_addr = kroute->nexthop;
1091 		nexthop.addr.sin6_scope_id = kroute->scope;
1092 		/*
1093 		 * XXX we should set the sin6_scope_id but the kernel
1094 		 * XXX does not expect it that way. It must be fiddled
1095 		 * XXX into the sin6_addr. Welcome to the typical
1096 		 * XXX IPv6 insanity and all without wine bottles.
1097 		 */
1098 		embedscope(&nexthop.addr);
1099 
1100 		/* adjust header */
1101 		hdr.rtm_flags |= RTF_GATEWAY;
1102 		hdr.rtm_addrs |= RTA_GATEWAY;
1103 		hdr.rtm_msglen += ROUNDUP(sizeof(struct sockaddr_in6));
1104 		/* adjust iovec */
1105 		iov[iovcnt].iov_base = &nexthop;
1106 		iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_in6));
1107 	} else if (kroute->ifindex) {
1108 		/*
1109 		 * We don't have an interface address in that network,
1110 		 * so we install a cloning route.  The kernel will then
1111 		 * do neigbor discovery.
1112 		 */
1113 		bzero(&ifp, sizeof(ifp));
1114 		ifp.addr.sdl_len = sizeof(struct sockaddr_dl);
1115 		ifp.addr.sdl_family = AF_LINK;
1116 
1117 		ifp.addr.sdl_index  = kroute->ifindex;
1118 		/* adjust header */
1119 		hdr.rtm_flags |= RTF_CLONING;
1120 		hdr.rtm_addrs |= RTA_GATEWAY;
1121 		hdr.rtm_msglen += ROUNDUP(sizeof(struct sockaddr_dl));
1122 		/* adjust iovec */
1123 		iov[iovcnt].iov_base = &ifp;
1124 		iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_dl));
1125 	}
1126 
1127 	bzero(&mask, sizeof(mask));
1128 	mask.addr.sin6_len = sizeof(struct sockaddr_in6);
1129 	mask.addr.sin6_family = AF_INET6;
1130 	mask.addr.sin6_addr = *prefixlen2mask(kroute->prefixlen);
1131 	/* adjust header */
1132 	if (kroute->prefixlen == 128)
1133 		hdr.rtm_flags |= RTF_HOST;
1134 	hdr.rtm_addrs |= RTA_NETMASK;
1135 	hdr.rtm_msglen += ROUNDUP(sizeof(struct sockaddr_in6));
1136 	/* adjust iovec */
1137 	iov[iovcnt].iov_base = &mask;
1138 	iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_in6));
1139 
1140 	if (kroute->rtlabel != 0) {
1141 		sa_rl.sr_len = sizeof(sa_rl);
1142 		sa_rl.sr_family = AF_UNSPEC;
1143 		label = rtlabel_id2name(kroute->rtlabel);
1144 		if (strlcpy(sa_rl.sr_label, label,
1145 		    sizeof(sa_rl.sr_label)) >= sizeof(sa_rl.sr_label)) {
1146 			log_warnx("send_rtmsg: invalid rtlabel");
1147 			return (-1);
1148 		}
1149 		/* adjust header */
1150 		hdr.rtm_addrs |= RTA_LABEL;
1151 		hdr.rtm_msglen += sizeof(sa_rl);
1152 		/* adjust iovec */
1153 		iov[iovcnt].iov_base = &sa_rl;
1154 		iov[iovcnt++].iov_len = sizeof(sa_rl);
1155 	}
1156 
1157 retry:
1158 	if (writev(fd, iov, iovcnt) == -1) {
1159 		if (errno == ESRCH) {
1160 			if (hdr.rtm_type == RTM_CHANGE) {
1161 				hdr.rtm_type = RTM_ADD;
1162 				goto retry;
1163 			} else if (hdr.rtm_type == RTM_DELETE) {
1164 				log_info("route %s/%u vanished before delete",
1165 				    log_sockaddr(&prefix), kroute->prefixlen);
1166 				return (0);
1167 			}
1168 		}
1169 		log_warn("send_rtmsg: action %u, prefix %s/%u", hdr.rtm_type,
1170 		    log_sockaddr(&prefix), kroute->prefixlen);
1171 		return (0);
1172 	}
1173 
1174 	return (0);
1175 }
1176 
1177 int
1178 fetchtable(void)
1179 {
1180 	size_t			 len;
1181 	int			 mib[7];
1182 	char			*buf;
1183 	int			 rv;
1184 
1185 	mib[0] = CTL_NET;
1186 	mib[1] = PF_ROUTE;
1187 	mib[2] = 0;
1188 	mib[3] = AF_INET6;
1189 	mib[4] = NET_RT_DUMP;
1190 	mib[5] = 0;
1191 	mib[6] = kr_state.rdomain;	/* rtableid */
1192 
1193 	if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) {
1194 		log_warn("sysctl");
1195 		return (-1);
1196 	}
1197 	if ((buf = malloc(len)) == NULL) {
1198 		log_warn("fetchtable");
1199 		return (-1);
1200 	}
1201 	if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) {
1202 		log_warn("sysctl");
1203 		free(buf);
1204 		return (-1);
1205 	}
1206 
1207 	rv = rtmsg_process(buf, len);
1208 	free(buf);
1209 
1210 	return (rv);
1211 }
1212 
1213 int
1214 fetchifs(u_short ifindex)
1215 {
1216 	size_t			 len;
1217 	int			 mib[6];
1218 	char			*buf;
1219 	int			 rv;
1220 
1221 	mib[0] = CTL_NET;
1222 	mib[1] = PF_ROUTE;
1223 	mib[2] = 0;
1224 	mib[3] = AF_INET6;
1225 	mib[4] = NET_RT_IFLIST;
1226 	mib[5] = ifindex;
1227 
1228 	if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) {
1229 		log_warn("sysctl");
1230 		return (-1);
1231 	}
1232 	if ((buf = malloc(len)) == NULL) {
1233 		log_warn("fetchifs");
1234 		return (-1);
1235 	}
1236 	if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) {
1237 		log_warn("sysctl");
1238 		free(buf);
1239 		return (-1);
1240 	}
1241 
1242 	rv = rtmsg_process(buf, len);
1243 	free(buf);
1244 
1245 	return (rv);
1246 }
1247 
1248 int
1249 dispatch_rtmsg(void)
1250 {
1251 	char			 buf[RT_BUF_SIZE];
1252 	ssize_t			 n;
1253 
1254 	if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) {
1255 		if (errno == EAGAIN || errno == EINTR)
1256 			return (0);
1257 		log_warn("dispatch_rtmsg: read error");
1258 		return (-1);
1259 	}
1260 
1261 	if (n == 0) {
1262 		log_warnx("routing socket closed");
1263 		return (-1);
1264 	}
1265 
1266 	return (rtmsg_process(buf, n));
1267 }
1268 
1269 int
1270 rtmsg_process(char *buf, size_t len)
1271 {
1272 	struct rt_msghdr	*rtm;
1273 	struct if_msghdr	 ifm;
1274 	struct ifa_msghdr	*ifam;
1275 	struct sockaddr		*sa, *rti_info[RTAX_MAX];
1276 	struct sockaddr_in6	*sa_in6;
1277 	struct sockaddr_rtlabel	*label;
1278 	struct kroute_node	*kr, *okr;
1279 	struct in6_addr		 prefix, nexthop;
1280 	u_int8_t		 prefixlen, prio;
1281 	int			 flags, mpath;
1282 	unsigned int		 scope;
1283 	u_short			 ifindex = 0;
1284 	int			 rv;
1285 	size_t			 offset;
1286 	char			*next;
1287 
1288 	for (offset = 0; offset < len; offset += rtm->rtm_msglen) {
1289 		next = buf + offset;
1290 		rtm = (struct rt_msghdr *)next;
1291 		if (len < offset + sizeof(u_short) ||
1292 		    len < offset + rtm->rtm_msglen)
1293 			fatalx("rtmsg_process: partial rtm in buffer");
1294 		if (rtm->rtm_version != RTM_VERSION)
1295 			continue;
1296 
1297 		bzero(&prefix, sizeof(prefix));
1298 		bzero(&nexthop, sizeof(nexthop));
1299 		scope = 0;
1300 		prefixlen = 0;
1301 		flags = F_KERNEL;
1302 		mpath = 0;
1303 		prio = 0;
1304 
1305 		sa = (struct sockaddr *)(next + rtm->rtm_hdrlen);
1306 		get_rtaddrs(rtm->rtm_addrs, sa, rti_info);
1307 
1308 		switch (rtm->rtm_type) {
1309 		case RTM_ADD:
1310 		case RTM_GET:
1311 		case RTM_CHANGE:
1312 		case RTM_DELETE:
1313 			if (rtm->rtm_errno)		/* failed attempts... */
1314 				continue;
1315 
1316 			if (rtm->rtm_tableid != kr_state.rdomain)
1317 				continue;
1318 
1319 			if (rtm->rtm_type == RTM_GET &&
1320 			    rtm->rtm_pid != kr_state.pid) /* caused by us */
1321 				continue;
1322 
1323 			if ((sa = rti_info[RTAX_DST]) == NULL)
1324 				continue;
1325 
1326 			/* Skip ARP/ND cache and broadcast routes. */
1327 			if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST))
1328 				continue;
1329 
1330 			if (rtm->rtm_flags & RTF_MPATH)
1331 				mpath = 1;
1332 			prio = rtm->rtm_priority;
1333 			flags = (prio == kr_state.fib_prio) ?
1334 			    F_OSPFD_INSERTED : F_KERNEL;
1335 
1336 			switch (sa->sa_family) {
1337 			case AF_INET6:
1338 				prefix =
1339 				    ((struct sockaddr_in6 *)sa)->sin6_addr;
1340 				sa_in6 = (struct sockaddr_in6 *)
1341 				    rti_info[RTAX_NETMASK];
1342 				if (sa_in6 != NULL) {
1343 					if (sa_in6->sin6_len != 0)
1344 						prefixlen = mask2prefixlen(
1345 						    sa_in6);
1346 				} else if (rtm->rtm_flags & RTF_HOST)
1347 					prefixlen = 128;
1348 				else
1349 					fatalx("classful IPv6 address?!!");
1350 				if (rtm->rtm_flags & RTF_STATIC)
1351 					flags |= F_STATIC;
1352 				if (rtm->rtm_flags & RTF_BLACKHOLE)
1353 					flags |= F_BLACKHOLE;
1354 				if (rtm->rtm_flags & RTF_REJECT)
1355 					flags |= F_REJECT;
1356 				if (rtm->rtm_flags & RTF_DYNAMIC)
1357 					flags |= F_DYNAMIC;
1358 				break;
1359 			default:
1360 				continue;
1361 			}
1362 
1363 			ifindex = rtm->rtm_index;
1364 			if ((sa = rti_info[RTAX_GATEWAY]) != NULL) {
1365 				switch (sa->sa_family) {
1366 				case AF_INET6:
1367 					if (rtm->rtm_flags & RTF_CONNECTED)
1368 						flags |= F_CONNECTED;
1369 
1370 					sa_in6 = (struct sockaddr_in6 *)sa;
1371 					/*
1372 					 * XXX The kernel provides the scope
1373 					 * XXX via the kame hack instead of
1374 					 * XXX the scope_id field.
1375 					 */
1376 					recoverscope(sa_in6);
1377 					nexthop = sa_in6->sin6_addr;
1378 					scope = sa_in6->sin6_scope_id;
1379 					break;
1380 				case AF_LINK:
1381 					flags |= F_CONNECTED;
1382 					break;
1383 				}
1384 			}
1385 		}
1386 
1387 		switch (rtm->rtm_type) {
1388 		case RTM_ADD:
1389 		case RTM_GET:
1390 		case RTM_CHANGE:
1391 			if (IN6_IS_ADDR_UNSPECIFIED(&nexthop) &&
1392 			    !(flags & F_CONNECTED)) {
1393 				log_warnx("rtmsg_process no nexthop for %s/%u",
1394 				    log_in6addr(&prefix), prefixlen);
1395 				continue;
1396 			}
1397 
1398 			if ((okr = kroute_find(&prefix, prefixlen, prio))
1399 			    != NULL) {
1400 				/* just add new multipath routes */
1401 				if (mpath && rtm->rtm_type == RTM_ADD)
1402 					goto add;
1403 				/* get the correct route */
1404 				kr = okr;
1405 				if (mpath && (kr = kroute_matchgw(okr,
1406 				    &nexthop, scope)) == NULL) {
1407 					log_warnx("rtmsg_process: mpath route"
1408 					    " not found");
1409 					/* add routes we missed out earlier */
1410 					goto add;
1411 				}
1412 
1413 				if (kr->r.flags & F_REDISTRIBUTED)
1414 					flags |= F_REDISTRIBUTED;
1415 				kr->r.nexthop = nexthop;
1416 				kr->r.scope = scope;
1417 				kr->r.flags = flags;
1418 				kr->r.ifindex = ifindex;
1419 
1420 				rtlabel_unref(kr->r.rtlabel);
1421 				kr->r.rtlabel = 0;
1422 				kr->r.ext_tag = 0;
1423 				if ((label = (struct sockaddr_rtlabel *)
1424 				    rti_info[RTAX_LABEL]) != NULL) {
1425 					kr->r.rtlabel =
1426 					    rtlabel_name2id(label->sr_label);
1427 					kr->r.ext_tag =
1428 					    rtlabel_id2tag(kr->r.rtlabel);
1429 				}
1430 
1431 				if (kif_validate(kr->r.ifindex))
1432 					kr->r.flags &= ~F_DOWN;
1433 				else
1434 					kr->r.flags |= F_DOWN;
1435 
1436 				/* just readd, the RDE will care */
1437 				kr_redistribute(okr);
1438 			} else {
1439 add:
1440 				if ((kr = calloc(1,
1441 				    sizeof(struct kroute_node))) == NULL) {
1442 					log_warn("rtmsg_process calloc");
1443 					return (-1);
1444 				}
1445 				kr->r.prefix = prefix;
1446 				kr->r.prefixlen = prefixlen;
1447 				kr->r.nexthop = nexthop;
1448 				kr->r.scope = scope;
1449 				kr->r.flags = flags;
1450 				kr->r.ifindex = ifindex;
1451 				kr->r.priority = prio;
1452 
1453 				if (rtm->rtm_priority == kr_state.fib_prio) {
1454 					log_warnx("alien OSPF route %s/%d",
1455 					    log_in6addr(&prefix), prefixlen);
1456 					rv = send_rtmsg(kr_state.fd,
1457 					    RTM_DELETE, &kr->r);
1458 					free(kr);
1459 					if (rv == -1)
1460 						return (-1);
1461 				} else {
1462 					if ((label = (struct sockaddr_rtlabel *)
1463 					    rti_info[RTAX_LABEL]) != NULL) {
1464 						kr->r.rtlabel =
1465 						    rtlabel_name2id(
1466 						    label->sr_label);
1467 						kr->r.ext_tag =
1468 						    rtlabel_id2tag(
1469 						    kr->r.rtlabel);
1470 					}
1471 
1472 					kroute_insert(kr);
1473 				}
1474 			}
1475 			break;
1476 		case RTM_DELETE:
1477 			if ((kr = kroute_find(&prefix, prefixlen, prio)) ==
1478 			    NULL)
1479 				continue;
1480 			if (!(kr->r.flags & F_KERNEL))
1481 				continue;
1482 			/* get the correct route */
1483 			okr = kr;
1484 			if (mpath && (kr = kroute_matchgw(kr, &nexthop,
1485 			    scope)) == NULL) {
1486 				log_warnx("rtmsg_process mpath route"
1487 				    " not found");
1488 				return (-1);
1489 			}
1490 			if (kroute_remove(kr) == -1)
1491 				return (-1);
1492 			break;
1493 		case RTM_IFINFO:
1494 			memcpy(&ifm, next, sizeof(ifm));
1495 			if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data,
1496 			    (struct sockaddr_dl *)rti_info[RTAX_IFP]);
1497 			break;
1498 		case RTM_NEWADDR:
1499 			ifam = (struct ifa_msghdr *)rtm;
1500 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1501 			    RTA_BRD)) == 0)
1502 				break;
1503 
1504 			if_newaddr(ifam->ifam_index,
1505 			    (struct sockaddr_in6 *)rti_info[RTAX_IFA],
1506 			    (struct sockaddr_in6 *)rti_info[RTAX_NETMASK],
1507 			    (struct sockaddr_in6 *)rti_info[RTAX_BRD]);
1508 			break;
1509 		case RTM_DELADDR:
1510 			ifam = (struct ifa_msghdr *)rtm;
1511 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1512 			    RTA_BRD)) == 0)
1513 				break;
1514 
1515 			if_deladdr(ifam->ifam_index,
1516 			    (struct sockaddr_in6 *)rti_info[RTAX_IFA],
1517 			    (struct sockaddr_in6 *)rti_info[RTAX_NETMASK],
1518 			    (struct sockaddr_in6 *)rti_info[RTAX_BRD]);
1519 			break;
1520 		case RTM_IFANNOUNCE:
1521 			if_announce(next);
1522 			break;
1523 		default:
1524 			/* ignore for now */
1525 			break;
1526 		}
1527 	}
1528 	return (offset);
1529 }
1530