xref: /openbsd-src/usr.sbin/ospfd/kroute.c (revision 95d3cd23b2d0e6a67e49f429e41f58010e853126)
1 /*	$OpenBSD: kroute.c,v 1.118 2025/01/01 13:44:52 anton Exp $ */
2 
3 /*
4  * Copyright (c) 2004 Esben Norby <norby@openbsd.org>
5  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 #include <sys/socket.h>
22 #include <sys/sysctl.h>
23 #include <sys/tree.h>
24 #include <sys/uio.h>
25 #include <netinet/in.h>
26 #include <arpa/inet.h>
27 #include <net/if.h>
28 #include <net/if_dl.h>
29 #include <net/if_types.h>
30 #include <net/route.h>
31 #include <err.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <limits.h>
39 
40 #include "ospfd.h"
41 #include "log.h"
42 
43 struct {
44 	u_int32_t		rtseq;
45 	pid_t			pid;
46 	int			fib_sync;
47 	int			fib_serial;
48 	u_int8_t		fib_prio;
49 	int			fd;
50 	struct event		ev;
51 	struct event		reload;
52 	u_int			rdomain;
53 #define KR_RELOAD_IDLE	0
54 #define KR_RELOAD_FETCH	1
55 #define KR_RELOAD_HOLD	2
56 	int			reload_state;
57 } kr_state;
58 
59 struct kroute_node {
60 	RB_ENTRY(kroute_node)	 entry;
61 	struct kroute_node	*next;
62 	struct kroute		 r;
63 	int			 serial;
64 };
65 
66 struct kif_node {
67 	RB_ENTRY(kif_node)	 entry;
68 	TAILQ_HEAD(, kif_addr)	 addrs;
69 	struct kif		 k;
70 };
71 
72 void	kr_redist_remove(struct kroute_node *, struct kroute_node *);
73 int	kr_redist_eval(struct kroute *, struct kroute *);
74 void	kr_redistribute(struct kroute_node *);
75 int	kroute_compare(struct kroute_node *, struct kroute_node *);
76 int	kif_compare(struct kif_node *, struct kif_node *);
77 int	kr_change_fib(struct kroute_node *, struct kroute *, int, int);
78 int	kr_delete_fib(struct kroute_node *);
79 
80 struct kroute_node	*kroute_find(in_addr_t, u_int8_t, u_int8_t);
81 struct kroute_node	*kroute_matchgw(struct kroute_node *, struct in_addr);
82 int			 kroute_insert(struct kroute_node *);
83 int			 kroute_remove(struct kroute_node *);
84 void			 kroute_clear(void);
85 
86 struct kif_node		*kif_find(u_short);
87 struct kif_node		*kif_insert(u_short);
88 int			 kif_remove(struct kif_node *);
89 struct kif		*kif_update(u_short, int, struct if_data *,
90 			    struct sockaddr_dl *);
91 int			 kif_validate(u_short);
92 
93 struct kroute_node	*kroute_match(in_addr_t);
94 
95 int		protect_lo(void);
96 u_int8_t	prefixlen_classful(in_addr_t);
97 void		get_rtaddrs(int, struct sockaddr *, struct sockaddr **);
98 void		if_change(u_short, int, struct if_data *, struct sockaddr_dl *);
99 void		if_newaddr(u_short, struct sockaddr_in *, struct sockaddr_in *,
100 		    struct sockaddr_in *);
101 void		if_deladdr(u_short, struct sockaddr_in *, struct sockaddr_in *,
102 		    struct sockaddr_in *);
103 void		if_announce(void *);
104 
105 int		send_rtmsg(int, int, struct kroute *);
106 int		dispatch_rtmsg(void);
107 int		fetchtable(void);
108 int		fetchifs(u_short);
109 int		rtmsg_process(char *, size_t);
110 void		kr_fib_reload_timer(int, short, void *);
111 void		kr_fib_reload_arm_timer(int);
112 
113 RB_HEAD(kroute_tree, kroute_node)	krt = RB_INITIALIZER(&krt);
114 RB_PROTOTYPE(kroute_tree, kroute_node, entry, kroute_compare)
115 RB_GENERATE(kroute_tree, kroute_node, entry, kroute_compare)
116 
117 RB_HEAD(kif_tree, kif_node)		kit = RB_INITIALIZER(&kit);
118 RB_PROTOTYPE(kif_tree, kif_node, entry, kif_compare)
119 RB_GENERATE(kif_tree, kif_node, entry, kif_compare)
120 
121 int
122 kif_init(void)
123 {
124 	if (fetchifs(0) == -1)
125 		return (-1);
126 
127 	return (0);
128 }
129 
130 int
131 kr_init(int fs, u_int rdomain, int redis_label_or_prefix, u_int8_t fib_prio)
132 {
133 	int		opt = 0, rcvbuf, default_rcvbuf;
134 	socklen_t	optlen;
135 	int		filter_prio = fib_prio;
136 	int		filter_flags = RTF_LLINFO | RTF_BROADCAST;
137 
138 	kr_state.fib_sync = fs;
139 	kr_state.rdomain = rdomain;
140 	kr_state.fib_prio = fib_prio;
141 
142 	if ((kr_state.fd = socket(AF_ROUTE,
143 	    SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, AF_INET)) == -1) {
144 		log_warn("kr_init: socket");
145 		return (-1);
146 	}
147 
148 	/* not interested in my own messages */
149 	if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK,
150 	    &opt, sizeof(opt)) == -1)
151 		log_warn("kr_init: setsockopt");	/* not fatal */
152 
153 	if (redis_label_or_prefix) {
154 		filter_prio = 0;
155 		log_info("%s: priority filter disabled", __func__);
156 	} else
157 		log_debug("%s: priority filter enabled", __func__);
158 
159 	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_PRIOFILTER, &filter_prio,
160 	    sizeof(filter_prio)) == -1) {
161 		log_warn("%s: setsockopt AF_ROUTE ROUTE_PRIOFILTER", __func__);
162 		/* not fatal */
163 	}
164 	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_FLAGFILTER, &filter_flags,
165 	    sizeof(filter_flags)) == -1) {
166 		log_warn("%s: setsockopt AF_ROUTE ROUTE_FLAGFILTER", __func__);
167 		/* not fatal */
168 	}
169 
170 	/* grow receive buffer, don't wanna miss messages */
171 	optlen = sizeof(default_rcvbuf);
172 	if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
173 	    &default_rcvbuf, &optlen) == -1)
174 		log_warn("kr_init getsockopt SOL_SOCKET SO_RCVBUF");
175 	else
176 		for (rcvbuf = MAX_RTSOCK_BUF;
177 		    rcvbuf > default_rcvbuf &&
178 		    setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
179 		    &rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS;
180 		    rcvbuf /= 2)
181 			;	/* nothing */
182 
183 	kr_state.pid = getpid();
184 	kr_state.rtseq = 1;
185 
186 	if (fetchtable() == -1)
187 		return (-1);
188 
189 	if (protect_lo() == -1)
190 		return (-1);
191 
192 	event_set(&kr_state.ev, kr_state.fd, EV_READ | EV_PERSIST,
193 	    kr_dispatch_msg, NULL);
194 	event_add(&kr_state.ev, NULL);
195 
196 	kr_state.reload_state = KR_RELOAD_IDLE;
197 	evtimer_set(&kr_state.reload, kr_fib_reload_timer, NULL);
198 
199 	return (0);
200 }
201 
202 int
203 kr_change_fib(struct kroute_node *kr, struct kroute *kroute, int krcount,
204     int action)
205 {
206 	int			 i;
207 	struct kroute_node	*kn, *nkn;
208 
209 	if (action == RTM_ADD) {
210 		/*
211 		 * First remove all stale multipath routes.
212 		 * This step must be skipped when the action is RTM_CHANGE
213 		 * because it is already a single path route that will be
214 		 * changed.
215 		 */
216 		for (kn = kr; kn != NULL; kn = nkn) {
217 			for (i = 0; i < krcount; i++) {
218 				if (kn->r.nexthop.s_addr ==
219 				    kroute[i].nexthop.s_addr)
220 					break;
221 			}
222 			nkn = kn->next;
223 			if (i == krcount) {
224 				/* stale route */
225 				if (kr_delete_fib(kn) == -1)
226 					log_warnx("kr_delete_fib failed");
227 				/*
228 				 * if head element was removed we need to adjust
229 				 * the head
230 				 */
231 				if (kr == kn)
232 					kr = nkn;
233 			}
234 		}
235 	}
236 
237 	/*
238 	 * now add or change the route
239 	 */
240 	for (i = 0; i < krcount; i++) {
241 		/* nexthop within 127/8 -> ignore silently */
242 		if ((kroute[i].nexthop.s_addr & htonl(IN_CLASSA_NET)) ==
243 		    htonl(INADDR_LOOPBACK & IN_CLASSA_NET))
244 			continue;
245 
246 		if (action == RTM_ADD && kr) {
247 			for (kn = kr; kn != NULL; kn = kn->next) {
248 				if (kn->r.nexthop.s_addr ==
249 				    kroute[i].nexthop.s_addr)
250 					break;
251 			}
252 
253 			if (kn != NULL)
254 				/* nexthop already present, skip it */
255 				continue;
256 		} else
257 			/* modify first entry */
258 			kn = kr;
259 
260 		/* send update */
261 		if (send_rtmsg(kr_state.fd, action, &kroute[i]) == -1)
262 			return (-1);
263 
264 		/* create new entry unless we are changing the first entry */
265 		if (action == RTM_ADD)
266 			if ((kn = calloc(1, sizeof(*kn))) == NULL)
267 				fatal(NULL);
268 
269 		kn->r.prefix.s_addr = kroute[i].prefix.s_addr;
270 		kn->r.prefixlen = kroute[i].prefixlen;
271 		kn->r.nexthop.s_addr = kroute[i].nexthop.s_addr;
272 		kn->r.flags = kroute[i].flags | F_OSPFD_INSERTED;
273 		kn->r.priority = kr_state.fib_prio;
274 		kn->r.ext_tag = kroute[i].ext_tag;
275 		rtlabel_unref(kn->r.rtlabel);	/* for RTM_CHANGE */
276 		kn->r.rtlabel = kroute[i].rtlabel;
277 
278 		if (action == RTM_ADD)
279 			if (kroute_insert(kn) == -1) {
280 				log_debug("kr_update_fib: cannot insert %s",
281 				    inet_ntoa(kn->r.nexthop));
282 				free(kn);
283 			}
284 		action = RTM_ADD;
285 	}
286 	return  (0);
287 }
288 
289 int
290 kr_change(struct kroute *kroute, int krcount)
291 {
292 	struct kroute_node	*kr;
293 	int			 action = RTM_ADD;
294 
295 	kroute->rtlabel = rtlabel_tag2id(kroute->ext_tag);
296 
297 	kr = kroute_find(kroute->prefix.s_addr, kroute->prefixlen,
298 	    kr_state.fib_prio);
299 	if (kr != NULL && kr->next == NULL && krcount == 1)
300 		/* single path OSPF route */
301 		action = RTM_CHANGE;
302 
303 	return (kr_change_fib(kr, kroute, krcount, action));
304 }
305 
306 int
307 kr_delete_fib(struct kroute_node *kr)
308 {
309 	if (kr->r.priority != kr_state.fib_prio)
310 		log_warn("kr_delete_fib: %s/%d has wrong priority %d",
311 		    inet_ntoa(kr->r.prefix), kr->r.prefixlen, kr->r.priority);
312 
313 	if (send_rtmsg(kr_state.fd, RTM_DELETE, &kr->r) == -1)
314 		return (-1);
315 
316 	if (kroute_remove(kr) == -1)
317 		return (-1);
318 
319 	return (0);
320 }
321 
322 int
323 kr_delete(struct kroute *kroute)
324 {
325 	struct kroute_node	*kr, *nkr;
326 
327 	if ((kr = kroute_find(kroute->prefix.s_addr, kroute->prefixlen,
328 	    kr_state.fib_prio)) == NULL)
329 		return (0);
330 
331 	while (kr != NULL) {
332 		nkr = kr->next;
333 		if (kr_delete_fib(kr) == -1)
334 			return (-1);
335 		kr = nkr;
336 	}
337 	return (0);
338 }
339 
340 void
341 kr_shutdown(void)
342 {
343 	kr_fib_decouple();
344 	kroute_clear();
345 	kif_clear();
346 }
347 
348 void
349 kr_fib_couple(void)
350 {
351 	struct kroute_node	*kr;
352 	struct kroute_node	*kn;
353 
354 	if (kr_state.fib_sync == 1)	/* already coupled */
355 		return;
356 
357 	kr_state.fib_sync = 1;
358 
359 	RB_FOREACH(kr, kroute_tree, &krt)
360 		if (kr->r.priority == kr_state.fib_prio)
361 			for (kn = kr; kn != NULL; kn = kn->next)
362 				send_rtmsg(kr_state.fd, RTM_ADD, &kn->r);
363 
364 	log_info("kernel routing table coupled");
365 }
366 
367 void
368 kr_fib_decouple(void)
369 {
370 	struct kroute_node	*kr;
371 	struct kroute_node	*kn;
372 
373 	if (kr_state.fib_sync == 0)	/* already decoupled */
374 		return;
375 
376 	RB_FOREACH(kr, kroute_tree, &krt)
377 		if (kr->r.priority == kr_state.fib_prio)
378 			for (kn = kr; kn != NULL; kn = kn->next)
379 				send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r);
380 
381 	kr_state.fib_sync = 0;
382 
383 	log_info("kernel routing table decoupled");
384 }
385 
386 void
387 kr_fib_reload_timer(int fd, short event, void *bula)
388 {
389 	if (kr_state.reload_state == KR_RELOAD_FETCH) {
390 		kr_fib_reload();
391 		kr_state.reload_state = KR_RELOAD_HOLD;
392 		kr_fib_reload_arm_timer(KR_RELOAD_HOLD_TIMER);
393 	} else {
394 		kr_state.reload_state = KR_RELOAD_IDLE;
395 	}
396 }
397 
398 void
399 kr_fib_reload_arm_timer(int delay)
400 {
401 	struct timeval		 tv;
402 
403 	timerclear(&tv);
404 	tv.tv_sec = delay / 1000;
405 	tv.tv_usec = (delay % 1000) * 1000;
406 
407 	if (evtimer_add(&kr_state.reload, &tv) == -1)
408 		fatal("add_reload_timer");
409 }
410 
411 void
412 kr_fib_reload(void)
413 {
414 	struct kroute_node	*krn, *kr, *kn;
415 
416 	log_info("reloading interface list and routing table");
417 
418 	kr_state.fib_serial++;
419 
420 	if (fetchifs(0) == -1 || fetchtable() == -1)
421 		return;
422 
423 	for (kr = RB_MIN(kroute_tree, &krt); kr != NULL; kr = krn) {
424 		krn = RB_NEXT(kroute_tree, &krt, kr);
425 
426 		do {
427 			kn = kr->next;
428 
429 			if (kr->serial != kr_state.fib_serial) {
430 				if (kr->r.priority == kr_state.fib_prio) {
431 					kr->serial = kr_state.fib_serial;
432 					if (send_rtmsg(kr_state.fd,
433 					    RTM_ADD, &kr->r) != 0)
434 						break;
435 				} else
436 					kroute_remove(kr);
437 			}
438 
439 		} while ((kr = kn) != NULL);
440 	}
441 }
442 
443 void
444 kr_fib_update_prio(u_int8_t fib_prio)
445 {
446 	struct kroute_node      *kr;
447 
448 	RB_FOREACH(kr, kroute_tree, &krt)
449 		if ((kr->r.flags & F_OSPFD_INSERTED))
450 			kr->r.priority = fib_prio;
451 
452 	log_info("fib priority changed from %hhu to %hhu",
453 	    kr_state.fib_prio, fib_prio);
454 
455 	kr_state.fib_prio = fib_prio;
456  }
457 
458 void
459 kr_dispatch_msg(int fd, short event, void *bula)
460 {
461 	/* XXX this is stupid */
462 	if (dispatch_rtmsg() == -1)
463 		event_loopexit(NULL);
464 }
465 
466 void
467 kr_show_route(struct imsg *imsg)
468 {
469 	struct kroute_node	*kr;
470 	struct kroute_node	*kn;
471 	int			 flags;
472 	struct in_addr		 addr;
473 
474 	switch (imsg->hdr.type) {
475 	case IMSG_CTL_KROUTE:
476 		if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(flags)) {
477 			log_warnx("kr_show_route: wrong imsg len");
478 			return;
479 		}
480 		memcpy(&flags, imsg->data, sizeof(flags));
481 		RB_FOREACH(kr, kroute_tree, &krt)
482 			if (!flags || kr->r.flags & flags) {
483 				kn = kr;
484 				do {
485 					main_imsg_compose_ospfe(IMSG_CTL_KROUTE,
486 					    imsg->hdr.pid,
487 					    &kn->r, sizeof(kn->r));
488 				} while ((kn = kn->next) != NULL);
489 			}
490 		break;
491 	case IMSG_CTL_KROUTE_ADDR:
492 		if (imsg->hdr.len != IMSG_HEADER_SIZE +
493 		    sizeof(struct in_addr)) {
494 			log_warnx("kr_show_route: wrong imsg len");
495 			return;
496 		}
497 		memcpy(&addr, imsg->data, sizeof(addr));
498 		kr = NULL;
499 		kr = kroute_match(addr.s_addr);
500 		if (kr != NULL)
501 			main_imsg_compose_ospfe(IMSG_CTL_KROUTE, imsg->hdr.pid,
502 			    &kr->r, sizeof(kr->r));
503 		break;
504 	default:
505 		log_debug("kr_show_route: error handling imsg");
506 		break;
507 	}
508 
509 	main_imsg_compose_ospfe(IMSG_CTL_END, imsg->hdr.pid, NULL, 0);
510 }
511 
512 void
513 kr_ifinfo(char *ifname, pid_t pid)
514 {
515 	struct kif_node	*kif;
516 
517 	RB_FOREACH(kif, kif_tree, &kit)
518 		if (ifname == NULL || !strcmp(ifname, kif->k.ifname)) {
519 			main_imsg_compose_ospfe(IMSG_CTL_IFINFO,
520 			    pid, &kif->k, sizeof(kif->k));
521 		}
522 
523 	main_imsg_compose_ospfe(IMSG_CTL_END, pid, NULL, 0);
524 }
525 
526 void
527 kr_redist_remove(struct kroute_node *kh, struct kroute_node *kn)
528 {
529 	struct kroute	*kr;
530 
531 	/* was the route redistributed? */
532 	if ((kn->r.flags & F_REDISTRIBUTED) == 0)
533 		return;
534 
535 	/* remove redistributed flag */
536 	kn->r.flags &= ~F_REDISTRIBUTED;
537 	kr = &kn->r;
538 
539 	/* probably inform the RDE (check if no other path is redistributed) */
540 	for (kn = kh; kn; kn = kn->next)
541 		if (kn->r.flags & F_REDISTRIBUTED)
542 			break;
543 
544 	if (kn == NULL)
545 		main_imsg_compose_rde(IMSG_NETWORK_DEL, 0, kr,
546 		    sizeof(struct kroute));
547 }
548 
549 int
550 kr_redist_eval(struct kroute *kr, struct kroute *new_kr)
551 {
552 	u_int32_t	 a, metric = 0;
553 
554 	/* Only non-ospfd routes are considered for redistribution. */
555 	if (!(kr->flags & F_KERNEL))
556 		goto dont_redistribute;
557 
558 	/* Dynamic routes are not redistributable. */
559 	if (kr->flags & F_DYNAMIC)
560 		goto dont_redistribute;
561 
562 	/* interface is not up and running so don't announce */
563 	if (kr->flags & F_DOWN)
564 		goto dont_redistribute;
565 
566 	/*
567 	 * We consider the loopback net and multicast addresses
568 	 * as not redistributable.
569 	 */
570 	a = ntohl(kr->prefix.s_addr);
571 	if (IN_MULTICAST(a) || (a >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
572 		goto dont_redistribute;
573 	/*
574 	 * Consider networks with nexthop loopback as not redistributable
575 	 * unless it is a reject or blackhole route.
576 	 */
577 	if (kr->nexthop.s_addr == htonl(INADDR_LOOPBACK) &&
578 	    !(kr->flags & (F_BLACKHOLE|F_REJECT)))
579 		goto dont_redistribute;
580 
581 	/* Should we redistribute this route? */
582 	if (!ospf_redistribute(kr, &metric))
583 		goto dont_redistribute;
584 
585 	/* prefix should be redistributed */
586 	kr->flags |= F_REDISTRIBUTED;
587 	/*
588 	 * only one of all multipath routes can be redistributed so
589 	 * redistribute the best one.
590 	 */
591 	if (new_kr->metric > metric) {
592 		*new_kr = *kr;
593 		new_kr->metric = metric;
594 	}
595 
596 	return (1);
597 
598 dont_redistribute:
599 	/* was the route redistributed? */
600 	if ((kr->flags & F_REDISTRIBUTED) == 0)
601 		return (0);
602 
603 	kr->flags &= ~F_REDISTRIBUTED;
604 	return (1);
605 }
606 
607 void
608 kr_redistribute(struct kroute_node *kh)
609 {
610 	struct kroute_node	*kn;
611 	struct kroute		 kr;
612 	int			 redistribute = 0;
613 
614 	/* only the highest prio route can be redistributed */
615 	if (kroute_find(kh->r.prefix.s_addr, kh->r.prefixlen, RTP_ANY) != kh)
616 		return;
617 
618 	bzero(&kr, sizeof(kr));
619 	kr.metric = UINT_MAX;
620 	for (kn = kh; kn; kn = kn->next)
621 		if (kr_redist_eval(&kn->r, &kr))
622 			redistribute = 1;
623 
624 	if (!redistribute)
625 		return;
626 
627 	if (kr.flags & F_REDISTRIBUTED) {
628 		main_imsg_compose_rde(IMSG_NETWORK_ADD, 0, &kr,
629 		    sizeof(struct kroute));
630 	} else {
631 		kr = kh->r;
632 		main_imsg_compose_rde(IMSG_NETWORK_DEL, 0, &kr,
633 		    sizeof(struct kroute));
634 	}
635 }
636 
637 void
638 kr_reload(int redis_label_or_prefix)
639 {
640 	struct kroute_node	*kr, *kn;
641 	u_int32_t		 dummy;
642 	int			 r;
643 	int			 filter_prio = kr_state.fib_prio;
644 
645 	/* update the priority filter */
646 	if (redis_label_or_prefix) {
647 		filter_prio = 0;
648 		log_info("%s: priority filter disabled", __func__);
649 	} else
650 		log_debug("%s: priority filter enabled", __func__);
651 
652 	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_PRIOFILTER, &filter_prio,
653 	    sizeof(filter_prio)) == -1) {
654 		log_warn("%s: setsockopt AF_ROUTE ROUTE_PRIOFILTER", __func__);
655 		/* not fatal */
656 	}
657 
658 	/* update redistribute lists */
659 	RB_FOREACH(kr, kroute_tree, &krt) {
660 		for (kn = kr; kn; kn = kn->next) {
661 			r = ospf_redistribute(&kn->r, &dummy);
662 			/*
663 			 * if it is redistributed, redistribute again metric
664 			 * may have changed.
665 			 */
666 			if ((kn->r.flags & F_REDISTRIBUTED && !r) || r)
667 				break;
668 		}
669 		if (kn) {
670 			/*
671 			 * kr_redistribute copes with removes and RDE with
672 			 * duplicates
673 			 */
674 			kr_redistribute(kr);
675 		}
676 	}
677 }
678 
679 /* rb-tree compare */
680 int
681 kroute_compare(struct kroute_node *a, struct kroute_node *b)
682 {
683 	if (ntohl(a->r.prefix.s_addr) < ntohl(b->r.prefix.s_addr))
684 		return (-1);
685 	if (ntohl(a->r.prefix.s_addr) > ntohl(b->r.prefix.s_addr))
686 		return (1);
687 	if (a->r.prefixlen < b->r.prefixlen)
688 		return (-1);
689 	if (a->r.prefixlen > b->r.prefixlen)
690 		return (1);
691 
692 	/* if the priority is RTP_ANY finish on the first address hit */
693 	if (a->r.priority == RTP_ANY || b->r.priority == RTP_ANY)
694 		return (0);
695 	if (a->r.priority < b->r.priority)
696 		return (-1);
697 	if (a->r.priority > b->r.priority)
698 		return (1);
699 	return (0);
700 }
701 
702 int
703 kif_compare(struct kif_node *a, struct kif_node *b)
704 {
705 	return (b->k.ifindex - a->k.ifindex);
706 }
707 
708 /* tree management */
709 struct kroute_node *
710 kroute_find(in_addr_t prefix, u_int8_t prefixlen, u_int8_t prio)
711 {
712 	struct kroute_node	s;
713 	struct kroute_node	*kn, *tmp;
714 
715 	s.r.prefix.s_addr = prefix;
716 	s.r.prefixlen = prefixlen;
717 	s.r.priority = prio;
718 
719 	kn = RB_FIND(kroute_tree, &krt, &s);
720 	if (kn && prio == RTP_ANY) {
721 		tmp = RB_PREV(kroute_tree, &krt, kn);
722 		while (tmp) {
723 			if (kroute_compare(&s, tmp) == 0)
724 				kn = tmp;
725 			else
726 				break;
727 			tmp = RB_PREV(kroute_tree, &krt, kn);
728 		}
729 	}
730 	return (kn);
731 }
732 
733 struct kroute_node *
734 kroute_matchgw(struct kroute_node *kr, struct in_addr nh)
735 {
736 	in_addr_t	nexthop;
737 
738 	nexthop = nh.s_addr;
739 
740 	while (kr) {
741 		if (kr->r.nexthop.s_addr == nexthop)
742 			return (kr);
743 		kr = kr->next;
744 	}
745 
746 	return (NULL);
747 }
748 
749 int
750 kroute_insert(struct kroute_node *kr)
751 {
752 	struct kroute_node	*krm, *krh;
753 
754 	kr->serial = kr_state.fib_serial;
755 
756 	if ((krh = RB_INSERT(kroute_tree, &krt, kr)) != NULL) {
757 		/*
758 		 * Multipath route, add at end of list.
759 		 */
760 		krm = krh;
761 		while (krm->next != NULL)
762 			krm = krm->next;
763 		krm->next = kr;
764 		kr->next = NULL; /* to be sure */
765 	} else
766 		krh = kr;
767 
768 	if (!(kr->r.flags & F_KERNEL)) {
769 		/* don't validate or redistribute ospf route */
770 		kr->r.flags &= ~F_DOWN;
771 		return (0);
772 	}
773 
774 	if (kif_validate(kr->r.ifindex))
775 		kr->r.flags &= ~F_DOWN;
776 	else
777 		kr->r.flags |= F_DOWN;
778 
779 	kr_redistribute(krh);
780 	return (0);
781 }
782 
783 int
784 kroute_remove(struct kroute_node *kr)
785 {
786 	struct kroute_node	*krm;
787 
788 	if ((krm = RB_FIND(kroute_tree, &krt, kr)) == NULL) {
789 		log_warnx("kroute_remove failed to find %s/%u",
790 		    inet_ntoa(kr->r.prefix), kr->r.prefixlen);
791 		return (-1);
792 	}
793 
794 	if (krm == kr) {
795 		/* head element */
796 		if (RB_REMOVE(kroute_tree, &krt, kr) == NULL) {
797 			log_warnx("kroute_remove failed for %s/%u",
798 			    inet_ntoa(kr->r.prefix), kr->r.prefixlen);
799 			return (-1);
800 		}
801 		if (kr->next != NULL) {
802 			if (RB_INSERT(kroute_tree, &krt, kr->next) != NULL) {
803 				log_warnx("kroute_remove failed to add %s/%u",
804 				    inet_ntoa(kr->r.prefix), kr->r.prefixlen);
805 				return (-1);
806 			}
807 		}
808 	} else {
809 		/* somewhere in the list */
810 		while (krm->next != kr && krm->next != NULL)
811 			krm = krm->next;
812 		if (krm->next == NULL) {
813 			log_warnx("kroute_remove multipath list corrupted "
814 			    "for %s/%u", inet_ntoa(kr->r.prefix),
815 			    kr->r.prefixlen);
816 			return (-1);
817 		}
818 		krm->next = kr->next;
819 	}
820 
821 	kr_redist_remove(krm, kr);
822 	rtlabel_unref(kr->r.rtlabel);
823 
824 	free(kr);
825 	return (0);
826 }
827 
828 void
829 kroute_clear(void)
830 {
831 	struct kroute_node	*kr;
832 
833 	while ((kr = RB_MIN(kroute_tree, &krt)) != NULL)
834 		kroute_remove(kr);
835 }
836 
837 struct kif_node *
838 kif_find(u_short ifindex)
839 {
840 	struct kif_node	s;
841 
842 	bzero(&s, sizeof(s));
843 	s.k.ifindex = ifindex;
844 
845 	return (RB_FIND(kif_tree, &kit, &s));
846 }
847 
848 struct kif *
849 kif_findname(char *ifname, struct in_addr addr, struct kif_addr **kap)
850 {
851 	struct kif_node	*kif;
852 	struct kif_addr	*ka;
853 
854 	RB_FOREACH(kif, kif_tree, &kit)
855 		if (!strcmp(ifname, kif->k.ifname)) {
856 			ka = TAILQ_FIRST(&kif->addrs);
857 			if (addr.s_addr != 0) {
858 				TAILQ_FOREACH(ka, &kif->addrs, entry) {
859 					if (addr.s_addr == ka->addr.s_addr)
860 						break;
861 				}
862 			}
863 			if (kap != NULL)
864 				*kap = ka;
865 			return (&kif->k);
866 		}
867 
868 	return (NULL);
869 }
870 
871 struct kif_node *
872 kif_insert(u_short ifindex)
873 {
874 	struct kif_node	*kif;
875 
876 	if ((kif = calloc(1, sizeof(struct kif_node))) == NULL)
877 		return (NULL);
878 
879 	kif->k.ifindex = ifindex;
880 	TAILQ_INIT(&kif->addrs);
881 
882 	if (RB_INSERT(kif_tree, &kit, kif) != NULL)
883 		fatalx("kif_insert: RB_INSERT");
884 
885 	return (kif);
886 }
887 
888 int
889 kif_remove(struct kif_node *kif)
890 {
891 	struct kif_addr	*ka;
892 
893 	if (RB_REMOVE(kif_tree, &kit, kif) == NULL) {
894 		log_warnx("RB_REMOVE(kif_tree, &kit, kif)");
895 		return (-1);
896 	}
897 
898 	while ((ka = TAILQ_FIRST(&kif->addrs)) != NULL) {
899 		TAILQ_REMOVE(&kif->addrs, ka, entry);
900 		free(ka);
901 	}
902 	free(kif);
903 	return (0);
904 }
905 
906 void
907 kif_clear(void)
908 {
909 	struct kif_node	*kif;
910 
911 	while ((kif = RB_MIN(kif_tree, &kit)) != NULL)
912 		kif_remove(kif);
913 }
914 
915 struct kif *
916 kif_update(u_short ifindex, int flags, struct if_data *ifd,
917     struct sockaddr_dl *sdl)
918 {
919 	struct kif_node		*kif;
920 
921 	if ((kif = kif_find(ifindex)) == NULL) {
922 		if ((kif = kif_insert(ifindex)) == NULL)
923 			return (NULL);
924 		kif->k.nh_reachable = (flags & IFF_UP) &&
925 		    LINK_STATE_IS_UP(ifd->ifi_link_state);
926 	}
927 
928 	kif->k.flags = flags;
929 	kif->k.link_state = ifd->ifi_link_state;
930 	kif->k.if_type = ifd->ifi_type;
931 	kif->k.baudrate = ifd->ifi_baudrate;
932 	kif->k.mtu = ifd->ifi_mtu;
933 	kif->k.rdomain = ifd->ifi_rdomain;
934 
935 	if (sdl && sdl->sdl_family == AF_LINK) {
936 		if (sdl->sdl_nlen >= sizeof(kif->k.ifname))
937 			memcpy(kif->k.ifname, sdl->sdl_data,
938 			    sizeof(kif->k.ifname) - 1);
939 		else if (sdl->sdl_nlen > 0)
940 			memcpy(kif->k.ifname, sdl->sdl_data,
941 			    sdl->sdl_nlen);
942 		/* string already terminated via calloc() */
943 	}
944 
945 	return (&kif->k);
946 }
947 
948 int
949 kif_validate(u_short ifindex)
950 {
951 	struct kif_node		*kif;
952 
953 	if ((kif = kif_find(ifindex)) == NULL) {
954 		log_warnx("interface with index %u not found", ifindex);
955 		return (1);
956 	}
957 
958 	return (kif->k.nh_reachable);
959 }
960 
961 struct kroute_node *
962 kroute_match(in_addr_t key)
963 {
964 	int			 i;
965 	struct kroute_node	*kr;
966 
967 	/* we will never match the default route */
968 	for (i = 32; i > 0; i--)
969 		if ((kr = kroute_find(key & prefixlen2mask(i), i,
970 		    RTP_ANY)) != NULL)
971 			return (kr);
972 
973 	/* if we don't have a match yet, try to find a default route */
974 	if ((kr = kroute_find(0, 0, RTP_ANY)) != NULL)
975 			return (kr);
976 
977 	return (NULL);
978 }
979 
980 /* misc */
981 int
982 protect_lo(void)
983 {
984 	struct kroute_node	*kr;
985 
986 	/* special protection for 127/8 */
987 	if ((kr = calloc(1, sizeof(struct kroute_node))) == NULL) {
988 		log_warn("protect_lo");
989 		return (-1);
990 	}
991 	kr->r.prefix.s_addr = htonl(INADDR_LOOPBACK & IN_CLASSA_NET);
992 	kr->r.prefixlen = 8;
993 	kr->r.flags = F_KERNEL|F_CONNECTED;
994 
995 	if (RB_INSERT(kroute_tree, &krt, kr) != NULL)
996 		free(kr);	/* kernel route already there, no problem */
997 
998 	return (0);
999 }
1000 
1001 u_int8_t
1002 prefixlen_classful(in_addr_t ina)
1003 {
1004 	/* it hurt to write this. */
1005 
1006 	if (ina >= 0xf0000000U)		/* class E */
1007 		return (32);
1008 	else if (ina >= 0xe0000000U)	/* class D */
1009 		return (4);
1010 	else if (ina >= 0xc0000000U)	/* class C */
1011 		return (24);
1012 	else if (ina >= 0x80000000U)	/* class B */
1013 		return (16);
1014 	else				/* class A */
1015 		return (8);
1016 }
1017 
1018 u_int8_t
1019 mask2prefixlen(in_addr_t ina)
1020 {
1021 	if (ina == 0)
1022 		return (0);
1023 	else
1024 		return (33 - ffs(ntohl(ina)));
1025 }
1026 
1027 in_addr_t
1028 prefixlen2mask(u_int8_t prefixlen)
1029 {
1030 	if (prefixlen == 0)
1031 		return (0);
1032 
1033 	return (htonl(0xffffffff << (32 - prefixlen)));
1034 }
1035 
1036 #define ROUNDUP(a) \
1037 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1038 
1039 void
1040 get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info)
1041 {
1042 	int	i;
1043 
1044 	for (i = 0; i < RTAX_MAX; i++) {
1045 		if (addrs & (1 << i)) {
1046 			rti_info[i] = sa;
1047 			sa = (struct sockaddr *)((char *)(sa) +
1048 			    ROUNDUP(sa->sa_len));
1049 		} else
1050 			rti_info[i] = NULL;
1051 	}
1052 }
1053 
1054 void
1055 if_change(u_short ifindex, int flags, struct if_data *ifd,
1056     struct sockaddr_dl *sdl)
1057 {
1058 	struct kroute_node	*kr, *tkr;
1059 	struct kif		*kif;
1060 	u_int8_t		 reachable;
1061 
1062 	if ((kif = kif_update(ifindex, flags, ifd, sdl)) == NULL) {
1063 		log_warn("if_change:  kif_update(%u)", ifindex);
1064 		return;
1065 	}
1066 
1067 	/* notify ospfe about interface link state */
1068 	main_imsg_compose_ospfe(IMSG_IFINFO, 0, kif, sizeof(struct kif));
1069 
1070 	reachable = (kif->flags & IFF_UP) &&
1071 	    LINK_STATE_IS_UP(kif->link_state);
1072 
1073 	if (reachable == kif->nh_reachable)
1074 		return;		/* nothing changed wrt nexthop validity */
1075 
1076 	kif->nh_reachable = reachable;
1077 
1078 	/* update redistribute list */
1079 	RB_FOREACH(kr, kroute_tree, &krt) {
1080 		for (tkr = kr; tkr != NULL; tkr = tkr->next) {
1081 			if (tkr->r.ifindex == ifindex) {
1082 				if (reachable)
1083 					tkr->r.flags &= ~F_DOWN;
1084 				else
1085 					tkr->r.flags |= F_DOWN;
1086 
1087 			}
1088 		}
1089 		kr_redistribute(kr);
1090 	}
1091 }
1092 
1093 void
1094 if_newaddr(u_short ifindex, struct sockaddr_in *ifa, struct sockaddr_in *mask,
1095     struct sockaddr_in *brd)
1096 {
1097 	struct kif_node 	*kif;
1098 	struct kif_addr 	*ka;
1099 	struct ifaddrchange	 ifn;
1100 
1101 	if (ifa == NULL || ifa->sin_family != AF_INET)
1102 		return;
1103 	if ((kif = kif_find(ifindex)) == NULL) {
1104 		log_warnx("if_newaddr: corresponding if %d not found", ifindex);
1105 		return;
1106 	}
1107 	if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL)
1108 		fatal("if_newaddr");
1109 	ka->addr = ifa->sin_addr;
1110 	if (mask)
1111 		ka->mask = mask->sin_addr;
1112 	else
1113 		ka->mask.s_addr = INADDR_NONE;
1114 	if (brd)
1115 		ka->dstbrd = brd->sin_addr;
1116 	else
1117 		ka->dstbrd.s_addr = INADDR_NONE;
1118 
1119 	TAILQ_INSERT_TAIL(&kif->addrs, ka, entry);
1120 
1121 	ifn.addr = ka->addr;
1122 	ifn.mask = ka->mask;
1123 	ifn.dst = ka->dstbrd;
1124 	ifn.ifindex = ifindex;
1125 	main_imsg_compose_ospfe(IMSG_IFADDRADD, 0, &ifn, sizeof(ifn));
1126 }
1127 
1128 void
1129 if_deladdr(u_short ifindex, struct sockaddr_in *ifa, struct sockaddr_in *mask,
1130     struct sockaddr_in *brd)
1131 {
1132 	struct kif_node 	*kif;
1133 	struct kif_addr		*ka, *nka;
1134 	struct ifaddrchange	 ifc;
1135 
1136 	if (ifa == NULL || ifa->sin_family != AF_INET)
1137 		return;
1138 	if ((kif = kif_find(ifindex)) == NULL) {
1139 		log_warnx("if_deladdr: corresponding if %d not found", ifindex);
1140 		return;
1141 	}
1142 
1143 	for (ka = TAILQ_FIRST(&kif->addrs); ka != NULL; ka = nka) {
1144 		nka = TAILQ_NEXT(ka, entry);
1145 
1146 		if (ka->addr.s_addr == ifa->sin_addr.s_addr) {
1147 			TAILQ_REMOVE(&kif->addrs, ka, entry);
1148 			ifc.addr = ifa->sin_addr;
1149 			ifc.ifindex = ifindex;
1150 			main_imsg_compose_ospfe(IMSG_IFADDRDEL, 0, &ifc,
1151 			    sizeof(ifc));
1152 			free(ka);
1153 			return;
1154 		}
1155 	}
1156 }
1157 
1158 void
1159 if_announce(void *msg)
1160 {
1161 	struct if_announcemsghdr	*ifan;
1162 	struct kif_node			*kif;
1163 
1164 	ifan = msg;
1165 
1166 	switch (ifan->ifan_what) {
1167 	case IFAN_ARRIVAL:
1168 		kif = kif_insert(ifan->ifan_index);
1169 		strlcpy(kif->k.ifname, ifan->ifan_name, sizeof(kif->k.ifname));
1170 		break;
1171 	case IFAN_DEPARTURE:
1172 		kif = kif_find(ifan->ifan_index);
1173 		if (kif != NULL)
1174 			kif_remove(kif);
1175 		break;
1176 	}
1177 }
1178 
1179 /* rtsock */
1180 int
1181 send_rtmsg(int fd, int action, struct kroute *kroute)
1182 {
1183 	struct iovec		iov[5];
1184 	struct rt_msghdr	hdr;
1185 	struct sockaddr_in	prefix;
1186 	struct sockaddr_in	nexthop;
1187 	struct sockaddr_in	mask;
1188 	struct sockaddr_rtlabel	sa_rl;
1189 	int			iovcnt = 0;
1190 	const char		*label;
1191 
1192 	if (kr_state.fib_sync == 0)
1193 		return (0);
1194 
1195 	/* initialize header */
1196 	bzero(&hdr, sizeof(hdr));
1197 	hdr.rtm_version = RTM_VERSION;
1198 	hdr.rtm_type = action;
1199 	hdr.rtm_priority = kr_state.fib_prio;
1200 	hdr.rtm_tableid = kr_state.rdomain;	/* rtableid */
1201 	if (action == RTM_CHANGE)
1202 		hdr.rtm_fmask = RTF_REJECT|RTF_BLACKHOLE;
1203 	else
1204 		hdr.rtm_flags = RTF_MPATH;
1205 	hdr.rtm_seq = kr_state.rtseq++;	/* overflow doesn't matter */
1206 	hdr.rtm_msglen = sizeof(hdr);
1207 	/* adjust iovec */
1208 	iov[iovcnt].iov_base = &hdr;
1209 	iov[iovcnt++].iov_len = sizeof(hdr);
1210 
1211 	bzero(&prefix, sizeof(prefix));
1212 	prefix.sin_len = sizeof(prefix);
1213 	prefix.sin_family = AF_INET;
1214 	prefix.sin_addr.s_addr = kroute->prefix.s_addr;
1215 	/* adjust header */
1216 	hdr.rtm_addrs |= RTA_DST;
1217 	hdr.rtm_msglen += sizeof(prefix);
1218 	/* adjust iovec */
1219 	iov[iovcnt].iov_base = &prefix;
1220 	iov[iovcnt++].iov_len = sizeof(prefix);
1221 
1222 	if (kroute->nexthop.s_addr != 0) {
1223 		bzero(&nexthop, sizeof(nexthop));
1224 		nexthop.sin_len = sizeof(nexthop);
1225 		nexthop.sin_family = AF_INET;
1226 		nexthop.sin_addr.s_addr = kroute->nexthop.s_addr;
1227 		/* adjust header */
1228 		hdr.rtm_flags |= RTF_GATEWAY;
1229 		hdr.rtm_addrs |= RTA_GATEWAY;
1230 		hdr.rtm_msglen += sizeof(nexthop);
1231 		/* adjust iovec */
1232 		iov[iovcnt].iov_base = &nexthop;
1233 		iov[iovcnt++].iov_len = sizeof(nexthop);
1234 	}
1235 
1236 	bzero(&mask, sizeof(mask));
1237 	mask.sin_len = sizeof(mask);
1238 	mask.sin_family = AF_INET;
1239 	mask.sin_addr.s_addr = prefixlen2mask(kroute->prefixlen);
1240 	/* adjust header */
1241 	hdr.rtm_addrs |= RTA_NETMASK;
1242 	hdr.rtm_msglen += sizeof(mask);
1243 	/* adjust iovec */
1244 	iov[iovcnt].iov_base = &mask;
1245 	iov[iovcnt++].iov_len = sizeof(mask);
1246 
1247 	if (kroute->rtlabel != 0) {
1248 		sa_rl.sr_len = sizeof(sa_rl);
1249 		sa_rl.sr_family = AF_UNSPEC;
1250 		label = rtlabel_id2name(kroute->rtlabel);
1251 		if (strlcpy(sa_rl.sr_label, label,
1252 		    sizeof(sa_rl.sr_label)) >= sizeof(sa_rl.sr_label)) {
1253 			log_warnx("send_rtmsg: invalid rtlabel");
1254 			return (-1);
1255 		}
1256 		/* adjust header */
1257 		hdr.rtm_addrs |= RTA_LABEL;
1258 		hdr.rtm_msglen += sizeof(sa_rl);
1259 		/* adjust iovec */
1260 		iov[iovcnt].iov_base = &sa_rl;
1261 		iov[iovcnt++].iov_len = sizeof(sa_rl);
1262 	}
1263 
1264 retry:
1265 	if (writev(fd, iov, iovcnt) == -1) {
1266 		if (errno == ESRCH) {
1267 			if (hdr.rtm_type == RTM_CHANGE) {
1268 				hdr.rtm_type = RTM_ADD;
1269 				goto retry;
1270 			} else if (hdr.rtm_type == RTM_DELETE) {
1271 				log_info("route %s/%u vanished before delete",
1272 				    inet_ntoa(kroute->prefix),
1273 				    kroute->prefixlen);
1274 				return (0);
1275 			}
1276 		}
1277 		log_warn("send_rtmsg: action %u, prefix %s/%u", hdr.rtm_type,
1278 		    inet_ntoa(kroute->prefix), kroute->prefixlen);
1279 		return (0);
1280 	}
1281 
1282 	return (0);
1283 }
1284 
1285 int
1286 fetchtable(void)
1287 {
1288 	size_t			 len;
1289 	int			 mib[7];
1290 	char			*buf;
1291 	int			 rv;
1292 
1293 	mib[0] = CTL_NET;
1294 	mib[1] = PF_ROUTE;
1295 	mib[2] = 0;
1296 	mib[3] = AF_INET;
1297 	mib[4] = NET_RT_DUMP;
1298 	mib[5] = 0;
1299 	mib[6] = kr_state.rdomain;	/* rtableid */
1300 
1301 	if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) {
1302 		log_warn("sysctl");
1303 		return (-1);
1304 	}
1305 	if ((buf = malloc(len)) == NULL) {
1306 		log_warn("fetchtable");
1307 		return (-1);
1308 	}
1309 	if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) {
1310 		log_warn("sysctl");
1311 		free(buf);
1312 		return (-1);
1313 	}
1314 
1315 	rv = rtmsg_process(buf, len);
1316 	free(buf);
1317 
1318 	return (rv);
1319 }
1320 
1321 int
1322 fetchifs(u_short ifindex)
1323 {
1324 	size_t			 len;
1325 	int			 mib[6];
1326 	char			*buf;
1327 	int			 rv;
1328 
1329 	mib[0] = CTL_NET;
1330 	mib[1] = PF_ROUTE;
1331 	mib[2] = 0;
1332 	mib[3] = AF_INET;
1333 	mib[4] = NET_RT_IFLIST;
1334 	mib[5] = ifindex;
1335 
1336 	if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) {
1337 		log_warn("sysctl");
1338 		return (-1);
1339 	}
1340 	if ((buf = malloc(len)) == NULL) {
1341 		log_warn("fetchif");
1342 		return (-1);
1343 	}
1344 	if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) {
1345 		log_warn("sysctl");
1346 		free(buf);
1347 		return (-1);
1348 	}
1349 
1350 	rv = rtmsg_process(buf, len);
1351 	free(buf);
1352 
1353 	return (rv);
1354 }
1355 
1356 int
1357 dispatch_rtmsg(void)
1358 {
1359 	char			 buf[RT_BUF_SIZE];
1360 	ssize_t			 n;
1361 
1362 	if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) {
1363 		if (errno == EAGAIN || errno == EINTR)
1364 			return (0);
1365 		log_warn("dispatch_rtmsg: read error");
1366 		return (-1);
1367 	}
1368 
1369 	if (n == 0) {
1370 		log_warnx("routing socket closed");
1371 		return (-1);
1372 	}
1373 
1374 	return (rtmsg_process(buf, n));
1375 }
1376 
1377 int
1378 rtmsg_process(char *buf, size_t len)
1379 {
1380 	struct rt_msghdr	*rtm;
1381 	struct if_msghdr	 ifm;
1382 	struct ifa_msghdr	*ifam;
1383 	struct sockaddr		*sa, *rti_info[RTAX_MAX];
1384 	struct sockaddr_in	*sa_in;
1385 	struct sockaddr_rtlabel	*label;
1386 	struct kroute_node	*kr, *okr;
1387 	struct in_addr		 prefix, nexthop;
1388 	u_int8_t		 prefixlen, prio;
1389 	int			 flags, mpath;
1390 	u_short			 ifindex = 0;
1391 	int			 rv, delay;
1392 
1393 	size_t			 offset;
1394 	char			*next;
1395 
1396 	for (offset = 0; offset < len; offset += rtm->rtm_msglen) {
1397 		next = buf + offset;
1398 		rtm = (struct rt_msghdr *)next;
1399 		if (len < offset + sizeof(u_short) ||
1400 		    len < offset + rtm->rtm_msglen)
1401 			fatalx("%s: partial rtm in buffer", __func__);
1402 		if (rtm->rtm_version != RTM_VERSION)
1403 			continue;
1404 
1405 		prefix.s_addr = 0;
1406 		prefixlen = 0;
1407 		nexthop.s_addr = 0;
1408 		mpath = 0;
1409 		prio = 0;
1410 		flags = F_KERNEL;
1411 
1412 		sa = (struct sockaddr *)(next + rtm->rtm_hdrlen);
1413 		get_rtaddrs(rtm->rtm_addrs, sa, rti_info);
1414 
1415 		switch (rtm->rtm_type) {
1416 		case RTM_ADD:
1417 		case RTM_GET:
1418 		case RTM_CHANGE:
1419 		case RTM_DELETE:
1420 			if (rtm->rtm_errno)		/* failed attempts... */
1421 				continue;
1422 
1423 			if (rtm->rtm_tableid != kr_state.rdomain)
1424 				continue;
1425 
1426 			if (rtm->rtm_type == RTM_GET &&
1427 			    rtm->rtm_pid != kr_state.pid)
1428 				continue;
1429 
1430 			if ((sa = rti_info[RTAX_DST]) == NULL)
1431 				continue;
1432 
1433 			/* Skip ARP/ND cache and broadcast routes. */
1434 			if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST))
1435 				continue;
1436 
1437 			if (rtm->rtm_flags & RTF_MPATH)
1438 				mpath = 1;
1439 			prio = rtm->rtm_priority;
1440 			flags = (prio == kr_state.fib_prio) ?
1441 			    F_OSPFD_INSERTED : F_KERNEL;
1442 
1443 			switch (sa->sa_family) {
1444 			case AF_INET:
1445 				prefix.s_addr =
1446 				    ((struct sockaddr_in *)sa)->sin_addr.s_addr;
1447 				sa_in = (struct sockaddr_in *)
1448 				    rti_info[RTAX_NETMASK];
1449 				if (sa_in != NULL) {
1450 					if (sa_in->sin_len != 0)
1451 						prefixlen = mask2prefixlen(
1452 						    sa_in->sin_addr.s_addr);
1453 				} else if (rtm->rtm_flags & RTF_HOST)
1454 					prefixlen = 32;
1455 				else
1456 					prefixlen =
1457 					    prefixlen_classful(prefix.s_addr);
1458 				if (rtm->rtm_flags & RTF_STATIC)
1459 					flags |= F_STATIC;
1460 				if (rtm->rtm_flags & RTF_BLACKHOLE)
1461 					flags |= F_BLACKHOLE;
1462 				if (rtm->rtm_flags & RTF_REJECT)
1463 					flags |= F_REJECT;
1464 				if (rtm->rtm_flags & RTF_DYNAMIC)
1465 					flags |= F_DYNAMIC;
1466 				break;
1467 			default:
1468 				continue;
1469 			}
1470 
1471 			ifindex = rtm->rtm_index;
1472 			if ((sa = rti_info[RTAX_GATEWAY]) != NULL) {
1473 				switch (sa->sa_family) {
1474 				case AF_INET:
1475 					if (rtm->rtm_flags & RTF_CONNECTED)
1476 						flags |= F_CONNECTED;
1477 
1478 					nexthop.s_addr = ((struct
1479 					    sockaddr_in *)sa)->sin_addr.s_addr;
1480 					break;
1481 				case AF_LINK:
1482 					/*
1483 					 * Traditional BSD connected routes have
1484 					 * a gateway of type AF_LINK.
1485 					 */
1486 					flags |= F_CONNECTED;
1487 					break;
1488 				}
1489 			}
1490 		}
1491 
1492 		switch (rtm->rtm_type) {
1493 		case RTM_ADD:
1494 		case RTM_GET:
1495 		case RTM_CHANGE:
1496 			if (nexthop.s_addr == 0 && !(flags & F_CONNECTED)) {
1497 				log_warnx("no nexthop for %s/%u",
1498 				    inet_ntoa(prefix), prefixlen);
1499 				continue;
1500 			}
1501 
1502 			if ((okr = kroute_find(prefix.s_addr, prefixlen, prio))
1503 			    != NULL) {
1504 				/* get the correct route */
1505 				kr = okr;
1506 				if ((mpath || prio == kr_state.fib_prio) &&
1507 				    (kr = kroute_matchgw(okr, nexthop)) ==
1508 				    NULL) {
1509 					log_warnx("%s: mpath route not found",
1510 					    __func__);
1511 					/* add routes we missed out earlier */
1512 					goto add;
1513 				}
1514 
1515 				if (kr->r.flags & F_REDISTRIBUTED)
1516 					flags |= F_REDISTRIBUTED;
1517 				kr->r.nexthop.s_addr = nexthop.s_addr;
1518 				kr->r.flags = flags;
1519 				kr->r.ifindex = ifindex;
1520 
1521 				rtlabel_unref(kr->r.rtlabel);
1522 				kr->r.rtlabel = 0;
1523 				kr->r.ext_tag = 0;
1524 				if ((label = (struct sockaddr_rtlabel *)
1525 				    rti_info[RTAX_LABEL]) != NULL) {
1526 					kr->r.rtlabel =
1527 					    rtlabel_name2id(label->sr_label);
1528 					kr->r.ext_tag =
1529 					    rtlabel_id2tag(kr->r.rtlabel);
1530 				}
1531 
1532 				if (kif_validate(kr->r.ifindex))
1533 					kr->r.flags &= ~F_DOWN;
1534 				else
1535 					kr->r.flags |= F_DOWN;
1536 
1537 				/* just readd, the RDE will care */
1538 				kr->serial = kr_state.fib_serial;
1539 				kr_redistribute(okr);
1540 			} else {
1541 add:
1542 				if ((kr = calloc(1,
1543 				    sizeof(struct kroute_node))) == NULL) {
1544 					log_warn("%s: calloc", __func__);
1545 					return (-1);
1546 				}
1547 
1548 				kr->r.prefix.s_addr = prefix.s_addr;
1549 				kr->r.prefixlen = prefixlen;
1550 				kr->r.nexthop.s_addr = nexthop.s_addr;
1551 				kr->r.flags = flags;
1552 				kr->r.ifindex = ifindex;
1553 				kr->r.priority = prio;
1554 
1555 				if (rtm->rtm_priority == kr_state.fib_prio) {
1556 					log_warnx("alien OSPF route %s/%d",
1557 					    inet_ntoa(prefix), prefixlen);
1558 					rv = send_rtmsg(kr_state.fd,
1559 					    RTM_DELETE, &kr->r);
1560 					free(kr);
1561 					if (rv == -1)
1562 						return (-1);
1563 				} else {
1564 					if ((label = (struct sockaddr_rtlabel *)
1565 					    rti_info[RTAX_LABEL]) != NULL) {
1566 						kr->r.rtlabel =
1567 						    rtlabel_name2id(
1568 						    label->sr_label);
1569 						kr->r.ext_tag =
1570 						    rtlabel_id2tag(
1571 						    kr->r.rtlabel);
1572 					}
1573 
1574 					kroute_insert(kr);
1575 				}
1576 			}
1577 			break;
1578 		case RTM_DELETE:
1579 			if ((kr = kroute_find(prefix.s_addr, prefixlen, prio))
1580 			    == NULL)
1581 				continue;
1582 			if (!(kr->r.flags & F_KERNEL))
1583 				continue;
1584 			/* get the correct route */
1585 			okr = kr;
1586 			if (mpath &&
1587 			    (kr = kroute_matchgw(kr, nexthop)) == NULL) {
1588 				log_warnx("%s: mpath route not found",
1589 				    __func__);
1590 				return (-1);
1591 			}
1592 			if (kroute_remove(kr) == -1)
1593 				return (-1);
1594 			break;
1595 		case RTM_IFINFO:
1596 			memcpy(&ifm, next, sizeof(ifm));
1597 			if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data,
1598 			    (struct sockaddr_dl *)rti_info[RTAX_IFP]);
1599 			break;
1600 		case RTM_NEWADDR:
1601 			ifam = (struct ifa_msghdr *)rtm;
1602 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1603 			    RTA_BRD)) == 0)
1604 				break;
1605 
1606 			if_newaddr(ifam->ifam_index,
1607 			    (struct sockaddr_in *)rti_info[RTAX_IFA],
1608 			    (struct sockaddr_in *)rti_info[RTAX_NETMASK],
1609 			    (struct sockaddr_in *)rti_info[RTAX_BRD]);
1610 			break;
1611 		case RTM_DELADDR:
1612 			ifam = (struct ifa_msghdr *)rtm;
1613 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1614 			    RTA_BRD)) == 0)
1615 				break;
1616 
1617 			if_deladdr(ifam->ifam_index,
1618 			    (struct sockaddr_in *)rti_info[RTAX_IFA],
1619 			    (struct sockaddr_in *)rti_info[RTAX_NETMASK],
1620 			    (struct sockaddr_in *)rti_info[RTAX_BRD]);
1621 			break;
1622 		case RTM_IFANNOUNCE:
1623 			if_announce(next);
1624 			break;
1625 		case RTM_DESYNC:
1626 			/*
1627 			 * We lost some routing packets. Schedule a reload
1628 			 * of the kernel route/interface information.
1629 			 */
1630 			if (kr_state.reload_state == KR_RELOAD_IDLE) {
1631 				delay = KR_RELOAD_TIMER;
1632 				log_info("desync; scheduling fib reload");
1633 			} else {
1634 				delay = KR_RELOAD_HOLD_TIMER;
1635 				log_debug("desync during KR_RELOAD_%s",
1636 				    kr_state.reload_state ==
1637 				    KR_RELOAD_FETCH ? "FETCH" : "HOLD");
1638 			}
1639 			kr_state.reload_state = KR_RELOAD_FETCH;
1640 			kr_fib_reload_arm_timer(delay);
1641 			break;
1642 		default:
1643 			/* ignore for now */
1644 			break;
1645 		}
1646 	}
1647 
1648 	return (offset);
1649 }
1650