xref: /openbsd-src/usr.sbin/ldpd/kroute.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: kroute.c,v 1.61 2016/06/18 01:25:53 renato Exp $ */
2 
3 /*
4  * Copyright (c) 2015, 2016 Renato Westphal <renato@openbsd.org>
5  * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
6  * Copyright (c) 2004 Esben Norby <norby@openbsd.org>
7  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
8  *
9  * Permission to use, copy, modify, and distribute this software for any
10  * purpose with or without fee is hereby granted, provided that the above
11  * copyright notice and this permission notice appear in all copies.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20  */
21 
22 #include <sys/types.h>
23 #include <sys/socket.h>
24 #include <sys/ioctl.h>
25 #include <sys/sysctl.h>
26 #include <arpa/inet.h>
27 #include <net/if_dl.h>
28 #include <net/route.h>
29 #include <netmpls/mpls.h>
30 #include <errno.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <limits.h>
35 
36 #include "ldpd.h"
37 #include "log.h"
38 
39 struct {
40 	uint32_t		rtseq;
41 	pid_t			pid;
42 	int			fib_sync;
43 	int			fd;
44 	int			ioctl_fd;
45 	struct event		ev;
46 } kr_state;
47 
48 struct kroute_node {
49 	TAILQ_ENTRY(kroute_node)	 entry;
50 	struct kroute_priority		*kprio;		/* back pointer */
51 	struct kroute			 r;
52 };
53 
54 struct kroute_priority {
55 	TAILQ_ENTRY(kroute_priority)	 entry;
56 	struct kroute_prefix		*kp;		/* back pointer */
57 	uint8_t				 priority;
58 	TAILQ_HEAD(, kroute_node)	 nexthops;
59 };
60 
61 struct kroute_prefix {
62 	RB_ENTRY(kroute_prefix)		 entry;
63 	int				 af;
64 	union ldpd_addr			 prefix;
65 	uint8_t				 prefixlen;
66 	TAILQ_HEAD(plist, kroute_priority) priorities;
67 };
68 RB_HEAD(kroute_tree, kroute_prefix);
69 RB_PROTOTYPE(kroute_tree, kroute_prefix, entry, kroute_compare)
70 
71 struct kif_addr {
72 	TAILQ_ENTRY(kif_addr)	 entry;
73 	struct kaddr		 a;
74 };
75 
76 struct kif_node {
77 	RB_ENTRY(kif_node)	 entry;
78 	TAILQ_HEAD(, kif_addr)	 addrs;
79 	struct kif		 k;
80 	struct kpw		*kpw;
81 };
82 RB_HEAD(kif_tree, kif_node);
83 RB_PROTOTYPE(kif_tree, kif_node, entry, kif_compare)
84 
85 static void		 kr_dispatch_msg(int, short, void *);
86 static void		 kr_redist_remove(struct kroute *);
87 static int		 kr_redist_eval(struct kroute *);
88 static void		 kr_redistribute(struct kroute_prefix *);
89 static __inline int	 kroute_compare(struct kroute_prefix *,
90 			    struct kroute_prefix *);
91 static struct kroute_prefix	*kroute_find_prefix(int, union ldpd_addr *,
92 			    uint8_t);
93 static struct kroute_priority	*kroute_find_prio(struct kroute_prefix *,
94 			    uint8_t);
95 static struct kroute_node	*kroute_find_gw(struct kroute_priority *,
96 				    union ldpd_addr *);
97 static int		 kroute_insert(struct kroute *);
98 static int		 kroute_uninstall(struct kroute_node *);
99 static int		 kroute_remove(struct kroute *);
100 static void		 kroute_clear(void);
101 static __inline int	 kif_compare(struct kif_node *, struct kif_node *);
102 static struct kif_node	*kif_find(unsigned short);
103 static struct kif_node	*kif_insert(unsigned short);
104 static int		 kif_remove(struct kif_node *);
105 static struct kif_node	*kif_update(unsigned short, int, struct if_data *,
106 			    struct sockaddr_dl *, int *);
107 static struct kroute_priority	*kroute_match(int, union ldpd_addr *);
108 static uint8_t		 prefixlen_classful(in_addr_t);
109 static void		 get_rtaddrs(int, struct sockaddr *,
110 			    struct sockaddr **);
111 static void		 if_change(unsigned short, int, struct if_data *,
112 		 	   struct sockaddr_dl *);
113 static void		 if_newaddr(unsigned short, struct sockaddr *,
114 			    struct sockaddr *, struct sockaddr *);
115 static void		 if_deladdr(unsigned short, struct sockaddr *,
116 			    struct sockaddr *, struct sockaddr *);
117 static void		 if_announce(void *);
118 static int		 send_rtmsg(int, int, struct kroute *, int);
119 static int		 send_rtmsg_v4(int fd, int, struct kroute *, int);
120 static int		 send_rtmsg_v6(int fd, int, struct kroute *, int);
121 static int		 fetchtable(void);
122 static int		 fetchifs(void);
123 static int		 dispatch_rtmsg(void);
124 static int		 rtmsg_process(char *, size_t);
125 static int		 rtmsg_process_route(struct rt_msghdr *,
126 			    struct sockaddr *[RTAX_MAX]);
127 static int		 kmpw_install(const char *, struct kpw *);
128 static int		 kmpw_uninstall(const char *);
129 
130 RB_GENERATE(kroute_tree, kroute_prefix, entry, kroute_compare)
131 RB_GENERATE(kif_tree, kif_node, entry, kif_compare)
132 
133 static struct kroute_tree	 krt = RB_INITIALIZER(&krt);
134 static struct kif_tree		 kit = RB_INITIALIZER(&kit);
135 
136 int
137 kif_init(void)
138 {
139 	if (fetchifs() == -1)
140 		return (-1);
141 
142 	return (0);
143 }
144 
145 int
146 kr_init(int fs)
147 {
148 	int		opt = 0, rcvbuf, default_rcvbuf;
149 	socklen_t	optlen;
150 	unsigned int	rtfilter;
151 
152 	kr_state.fib_sync = fs;
153 
154 	if ((kr_state.fd = socket(AF_ROUTE,
155 	    SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)) == -1) {
156 		log_warn("%s: socket", __func__);
157 		return (-1);
158 	}
159 
160 	/* not interested in my own messages */
161 	if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK,
162 	    &opt, sizeof(opt)) == -1)
163 		log_warn("%s: setsockopt(SO_USELOOPBACK)", __func__);
164 
165 	/* filter out unwanted messages */
166 	rtfilter = ROUTE_FILTER(RTM_ADD) | ROUTE_FILTER(RTM_GET) |
167 	    ROUTE_FILTER(RTM_CHANGE) | ROUTE_FILTER(RTM_DELETE) |
168 	    ROUTE_FILTER(RTM_IFINFO) | ROUTE_FILTER(RTM_NEWADDR) |
169 	    ROUTE_FILTER(RTM_DELADDR) | ROUTE_FILTER(RTM_IFANNOUNCE);
170 
171 	if (setsockopt(kr_state.fd, PF_ROUTE, ROUTE_MSGFILTER,
172 	    &rtfilter, sizeof(rtfilter)) == -1)
173 		log_warn("%s: setsockopt(ROUTE_MSGFILTER)", __func__);
174 
175 	/* grow receive buffer, don't wanna miss messages */
176 	optlen = sizeof(default_rcvbuf);
177 	if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
178 	    &default_rcvbuf, &optlen) == -1)
179 		log_warn("%s: getsockopt SOL_SOCKET SO_RCVBUF", __func__);
180 	else
181 		for (rcvbuf = MAX_RTSOCK_BUF;
182 		    rcvbuf > default_rcvbuf &&
183 		    setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
184 		    &rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS;
185 		    rcvbuf /= 2)
186 			;	/* nothing */
187 
188 	kr_state.pid = getpid();
189 	kr_state.rtseq = 1;
190 
191 	if (fetchtable() == -1)
192 		return (-1);
193 
194 	event_set(&kr_state.ev, kr_state.fd, EV_READ | EV_PERSIST,
195 	    kr_dispatch_msg, NULL);
196 	event_add(&kr_state.ev, NULL);
197 
198 	if ((kr_state.ioctl_fd = socket(AF_INET,
199 	    SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)) == -1) {
200 		log_warn("%s: ioctl socket", __func__);
201 		return (-1);
202 	}
203 
204 	return (0);
205 }
206 
207 void
208 kif_redistribute(const char *ifname)
209 {
210 	struct kif_node		*kif;
211 	struct kif_addr		*ka;
212 
213 	RB_FOREACH(kif, kif_tree, &kit) {
214 		if (ifname && strcmp(kif->k.ifname, ifname) != 0)
215 			continue;
216 
217 		TAILQ_FOREACH(ka, &kif->addrs, entry)
218 			main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a,
219 			    sizeof(ka->a));
220 	}
221 }
222 
223 int
224 kr_change(struct kroute *kr)
225 {
226 	struct kroute_prefix	*kp;
227 	struct kroute_priority	*kprio;
228 	struct kroute_node	*kn;
229 	int			 action = RTM_ADD;
230 
231 	kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen);
232 	if (kp == NULL)
233 		goto miss;
234 
235 	kprio = kroute_find_prio(kp, kr->priority);
236 	if (kprio == NULL)
237 		goto miss;
238 
239 	kn = kroute_find_gw(kprio, &kr->nexthop);
240 	if (kn == NULL)
241 		goto miss;
242 
243 	if (kn->r.flags & F_LDPD_INSERTED)
244 		action = RTM_CHANGE;
245 
246 	kn->r.local_label = kr->local_label;
247 	kn->r.remote_label = kr->remote_label;
248 	kn->r.flags = kn->r.flags | F_LDPD_INSERTED;
249 
250 	/* send update */
251 	if (send_rtmsg(kr_state.fd, action, &kn->r, AF_MPLS) == -1)
252 		return (-1);
253 
254 	if (ldp_addrisset(kn->r.af, &kn->r.nexthop) &&
255 	    kn->r.remote_label != NO_LABEL) {
256 		if (send_rtmsg(kr_state.fd, RTM_CHANGE, &kn->r, AF_INET) == -1)
257 			return (-1);
258 	}
259 
260 	return (0);
261 
262 miss:
263 	log_warnx("%s: lost FEC %s/%d nexthop %s", __func__,
264 	    log_addr(kr->af, &kr->prefix), kr->prefixlen,
265 	    log_addr(kr->af, &kr->nexthop));
266 	return (-1);
267 }
268 
269 int
270 kr_delete(struct kroute *kr)
271 {
272 	struct kroute_prefix	*kp;
273 	struct kroute_priority	*kprio;
274 	struct kroute_node	*kn;
275 	int			 update = 0;
276 
277 	kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen);
278 	if (kp == NULL)
279 		return (0);
280 	kprio = kroute_find_prio(kp, kr->priority);
281 	if (kprio == NULL)
282 		return (0);
283 	kn = kroute_find_gw(kprio, &kr->nexthop);
284 	if (kn == NULL)
285 		return (0);
286 
287 	if (!(kn->r.flags & F_LDPD_INSERTED))
288 		return (0);
289 	if (ldp_addrisset(kn->r.af, &kn->r.nexthop) &&
290 	    kn->r.remote_label != NO_LABEL)
291 		update = 1;
292 
293 	/* kill MPLS LSP */
294 	if (send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r, AF_MPLS) == -1)
295 		return (-1);
296 
297 	kn->r.flags &= ~F_LDPD_INSERTED;
298 	kn->r.local_label = NO_LABEL;
299 	kn->r.remote_label = NO_LABEL;
300 
301 	if (update &&
302 	    send_rtmsg(kr_state.fd, RTM_CHANGE, &kn->r, AF_INET) == -1)
303 		return (-1);
304 
305 	return (0);
306 }
307 
308 void
309 kr_shutdown(void)
310 {
311 	kr_fib_decouple();
312 	kroute_clear();
313 	kif_clear();
314 }
315 
316 void
317 kr_fib_couple(void)
318 {
319 	struct kroute_prefix	*kp;
320 	struct kroute_priority	*kprio;
321 	struct kroute_node	*kn;
322 	struct kif_node		*kif;
323 
324 	if (kr_state.fib_sync == 1)	/* already coupled */
325 		return;
326 
327 	kr_state.fib_sync = 1;
328 
329 	RB_FOREACH(kp, kroute_tree, &krt) {
330 		kprio = TAILQ_FIRST(&kp->priorities);
331 		if (kprio == NULL)
332 			continue;
333 
334 		TAILQ_FOREACH(kn, &kprio->nexthops, entry) {
335 			if (!(kn->r.flags & F_LDPD_INSERTED))
336 				continue;
337 
338 			send_rtmsg(kr_state.fd, RTM_ADD, &kn->r, AF_MPLS);
339 
340 			if (ldp_addrisset(kn->r.af, &kn->r.nexthop) &&
341 			    kn->r.remote_label != NO_LABEL) {
342 				send_rtmsg(kr_state.fd, RTM_CHANGE,
343 				    &kn->r, AF_INET);
344 			}
345 		}
346 	}
347 
348 	RB_FOREACH(kif, kif_tree, &kit)
349 		if (kif->kpw)
350 			kmpw_install(kif->k.ifname, kif->kpw);
351 
352 	log_info("kernel routing table coupled");
353 }
354 
355 void
356 kr_fib_decouple(void)
357 {
358 	struct kroute_prefix	*kp;
359 	struct kroute_priority	*kprio;
360 	struct kroute_node	*kn;
361 	uint32_t		 rl;
362 	struct kif_node		*kif;
363 
364 	if (kr_state.fib_sync == 0)	/* already decoupled */
365 		return;
366 
367 	RB_FOREACH(kp, kroute_tree, &krt) {
368 		kprio = TAILQ_FIRST(&kp->priorities);
369 		if (kprio == NULL)
370 			continue;
371 
372 		TAILQ_FOREACH(kn, &kprio->nexthops, entry) {
373 			if (!(kn->r.flags & F_LDPD_INSERTED))
374 				continue;
375 
376 			send_rtmsg(kr_state.fd, RTM_DELETE,
377 			    &kn->r, AF_MPLS);
378 
379 			if (ldp_addrisset(kn->r.af, &kn->r.nexthop) &&
380 			    kn->r.remote_label != NO_LABEL) {
381 				rl = kn->r.remote_label;
382 				kn->r.remote_label = NO_LABEL;
383 				send_rtmsg(kr_state.fd, RTM_CHANGE,
384 				    &kn->r, AF_INET);
385 				kn->r.remote_label = rl;
386 			}
387 		}
388 	}
389 
390 	RB_FOREACH(kif, kif_tree, &kit)
391 		if (kif->kpw)
392 			kmpw_uninstall(kif->k.ifname);
393 
394 	kr_state.fib_sync = 0;
395 	log_info("kernel routing table decoupled");
396 }
397 
398 void
399 kr_change_egress_label(int af, int was_implicit)
400 {
401 	struct kroute_prefix	*kp;
402 	struct kroute_priority	*kprio;
403 	struct kroute_node	*kn;
404 
405 	RB_FOREACH(kp, kroute_tree, &krt) {
406 		if (kp->af != af)
407 			continue;
408 
409 		TAILQ_FOREACH(kprio, &kp->priorities, entry) {
410 			TAILQ_FOREACH(kn, &kprio->nexthops, entry) {
411 				if (kn->r.local_label > MPLS_LABEL_RESERVED_MAX)
412 					continue;
413 
414 				if (!was_implicit) {
415 					kn->r.local_label = MPLS_LABEL_IMPLNULL;
416 					continue;
417 				}
418 
419 				switch (kn->r.af) {
420 				case AF_INET:
421 					kn->r.local_label = MPLS_LABEL_IPV4NULL;
422 					break;
423 				case AF_INET6:
424 					kn->r.local_label = MPLS_LABEL_IPV6NULL;
425 					break;
426 				default:
427 					break;
428 				}
429 			}
430 		}
431 	}
432 }
433 
434 /* ARGSUSED */
435 static void
436 kr_dispatch_msg(int fd, short event, void *bula)
437 {
438 	if (dispatch_rtmsg() == -1)
439 		event_loopexit(NULL);
440 }
441 
442 void
443 kr_show_route(struct imsg *imsg)
444 {
445 	struct kroute_prefix	*kp;
446 	struct kroute_priority	*kprio;
447 	struct kroute_node	*kn;
448 	int			 flags;
449 	struct kroute		 kr;
450 
451 	switch (imsg->hdr.type) {
452 	case IMSG_CTL_KROUTE:
453 		if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(flags)) {
454 			log_warnx("%s: wrong imsg len", __func__);
455 			return;
456 		}
457 		memcpy(&flags, imsg->data, sizeof(flags));
458 
459 		RB_FOREACH(kp, kroute_tree, &krt)
460 			TAILQ_FOREACH(kprio, &kp->priorities, entry)
461 				TAILQ_FOREACH(kn, &kprio->nexthops, entry) {
462 					if (flags && !(kn->r.flags & flags))
463 						continue;
464 
465 					main_imsg_compose_ldpe(IMSG_CTL_KROUTE,
466 					    imsg->hdr.pid, &kn->r,
467 					    sizeof(kn->r));
468 				}
469 		break;
470 	case IMSG_CTL_KROUTE_ADDR:
471 		if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(kr)) {
472 			log_warnx("%s: wrong imsg len", __func__);
473 			return;
474 		}
475 		memcpy(&kr, imsg->data, sizeof(kr));
476 
477 		kprio = kroute_match(kr.af, &kr.prefix);
478 		if (kprio == NULL)
479 			break;
480 
481 		TAILQ_FOREACH(kn, &kprio->nexthops, entry)
482 			main_imsg_compose_ldpe(IMSG_CTL_KROUTE, imsg->hdr.pid,
483 			    &kn->r, sizeof(kn->r));
484 		break;
485 	default:
486 		log_debug("%s: error handling imsg", __func__);
487 		break;
488 	}
489 	main_imsg_compose_ldpe(IMSG_CTL_END, imsg->hdr.pid, NULL, 0);
490 }
491 
492 void
493 kr_ifinfo(char *ifname, pid_t pid)
494 {
495 	struct kif_node	*kif;
496 
497 	RB_FOREACH(kif, kif_tree, &kit)
498 		if (ifname == NULL || !strcmp(ifname, kif->k.ifname)) {
499 			main_imsg_compose_ldpe(IMSG_CTL_IFINFO,
500 			    pid, &kif->k, sizeof(kif->k));
501 		}
502 
503 	main_imsg_compose_ldpe(IMSG_CTL_END, pid, NULL, 0);
504 }
505 
506 static void
507 kr_redist_remove(struct kroute *kr)
508 {
509 	/* was the route redistributed? */
510 	if ((kr->flags & F_REDISTRIBUTED) == 0)
511 		return;
512 
513 	/* remove redistributed flag */
514 	kr->flags &= ~F_REDISTRIBUTED;
515 	main_imsg_compose_lde(IMSG_NETWORK_DEL, 0, kr, sizeof(*kr));
516 }
517 
518 static int
519 kr_redist_eval(struct kroute *kr)
520 {
521 	/* was the route redistributed? */
522 	if (kr->flags & F_REDISTRIBUTED)
523 		goto dont_redistribute;
524 
525 	/* Dynamic routes are not redistributable. */
526 	if (kr->flags & F_DYNAMIC)
527 		goto dont_redistribute;
528 
529 	/* filter-out non-redistributable addresses */
530 	if (bad_addr(kr->af, &kr->prefix) ||
531 	    (kr->af == AF_INET6 && IN6_IS_SCOPE_EMBED(&kr->prefix.v6)))
532 		goto dont_redistribute;
533 
534 	/* do not redistribute the default route */
535 	if (kr->prefixlen == 0)
536 		goto dont_redistribute;
537 
538 	/*
539 	 * Consider networks with nexthop loopback as not redistributable
540 	 * unless it is a reject or blackhole route.
541 	 */
542 	switch (kr->af) {
543 	case AF_INET:
544 		if (kr->nexthop.v4.s_addr == htonl(INADDR_LOOPBACK) &&
545 		    !(kr->flags & (F_BLACKHOLE|F_REJECT)))
546 			goto dont_redistribute;
547 		break;
548 	case AF_INET6:
549 		if (IN6_IS_ADDR_LOOPBACK(&kr->nexthop.v6) &&
550 		    !(kr->flags & (F_BLACKHOLE|F_REJECT)))
551 			goto dont_redistribute;
552 		break;
553 	default:
554 		log_debug("%s: unexpected address-family", __func__);
555 		break;
556 	}
557 
558 	/* prefix should be redistributed */
559 	kr->flags |= F_REDISTRIBUTED;
560 	main_imsg_compose_lde(IMSG_NETWORK_ADD, 0, kr, sizeof(*kr));
561 	return (1);
562 
563 dont_redistribute:
564 	return (0);
565 }
566 
567 static void
568 kr_redistribute(struct kroute_prefix *kp)
569 {
570 	struct kroute_priority	*kprio;
571 	struct kroute_node	*kn;
572 
573 	TAILQ_FOREACH_REVERSE(kprio, &kp->priorities, plist, entry) {
574 		if (kprio == TAILQ_FIRST(&kp->priorities)) {
575 			TAILQ_FOREACH(kn, &kprio->nexthops, entry)
576 				kr_redist_eval(&kn->r);
577 		} else {
578 			TAILQ_FOREACH(kn, &kprio->nexthops, entry)
579 				kr_redist_remove(&kn->r);
580 		}
581 	}
582 }
583 
584 /* rb-tree compare */
585 static __inline int
586 kroute_compare(struct kroute_prefix *a, struct kroute_prefix *b)
587 {
588 	int		 addrcmp;
589 
590 	if (a->af < b->af)
591 		return (-1);
592 	if (a->af > b->af)
593 		return (1);
594 
595 	addrcmp = ldp_addrcmp(a->af, &a->prefix, &b->prefix);
596 	if (addrcmp != 0)
597 		return (addrcmp);
598 
599 	if (a->prefixlen < b->prefixlen)
600 		return (-1);
601 	if (a->prefixlen > b->prefixlen)
602 		return (1);
603 
604 	return (0);
605 }
606 
607 /* tree management */
608 static struct kroute_prefix *
609 kroute_find_prefix(int af, union ldpd_addr *prefix, uint8_t prefixlen)
610 {
611 	struct kroute_prefix	 s;
612 
613 	s.af = af;
614 	s.prefix = *prefix;
615 	s.prefixlen = prefixlen;
616 
617 	return (RB_FIND(kroute_tree, &krt, &s));
618 }
619 
620 static struct kroute_priority *
621 kroute_find_prio(struct kroute_prefix *kp, uint8_t prio)
622 {
623 	struct kroute_priority	*kprio;
624 
625 	/* RTP_ANY here picks the lowest priority node */
626 	if (prio == RTP_ANY)
627 		return (TAILQ_FIRST(&kp->priorities));
628 
629 	TAILQ_FOREACH(kprio, &kp->priorities, entry)
630 		if (kprio->priority == prio)
631 			return (kprio);
632 
633 	return (NULL);
634 }
635 
636 static struct kroute_node *
637 kroute_find_gw(struct kroute_priority *kprio, union ldpd_addr *nh)
638 {
639 	struct kroute_node	*kn;
640 
641 	TAILQ_FOREACH(kn, &kprio->nexthops, entry)
642 		if (ldp_addrcmp(kprio->kp->af, &kn->r.nexthop, nh) == 0)
643 			return (kn);
644 
645 	return (NULL);
646 }
647 
648 static int
649 kroute_insert(struct kroute *kr)
650 {
651 	struct kroute_prefix	*kp;
652 	struct kroute_priority	*kprio, *tmp;
653 	struct kroute_node	*kn;
654 
655 	kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen);
656 	if (kp == NULL) {
657 		kp = calloc(1, sizeof((*kp)));
658 		if (kp == NULL)
659 			fatal(__func__);
660 		kp->af = kr->af;
661 		kp->prefix = kr->prefix;
662 		kp->prefixlen = kr->prefixlen;
663 		TAILQ_INIT(&kp->priorities);
664 		RB_INSERT(kroute_tree, &krt, kp);
665 	}
666 
667 	kprio = kroute_find_prio(kp, kr->priority);
668 	if (kprio == NULL) {
669 		kprio = calloc(1, sizeof(*kprio));
670 		if (kprio == NULL)
671 			fatal(__func__);
672 		kprio->kp = kp;
673 		kprio->priority = kr->priority;
674 		TAILQ_INIT(&kprio->nexthops);
675 
676 		/* lower priorities first */
677 		TAILQ_FOREACH(tmp, &kp->priorities, entry)
678 			if (tmp->priority > kprio->priority)
679 				break;
680 		if (tmp)
681 			TAILQ_INSERT_BEFORE(tmp, kprio, entry);
682 		else
683 			TAILQ_INSERT_TAIL(&kp->priorities, kprio, entry);
684 	}
685 
686 	kn = kroute_find_gw(kprio, &kr->nexthop);
687 	if (kn == NULL) {
688 		kn = calloc(1, sizeof(*kn));
689 		if (kn == NULL)
690 			fatal(__func__);
691 		kn->kprio = kprio;
692 		kn->r = *kr;
693 		TAILQ_INSERT_TAIL(&kprio->nexthops, kn, entry);
694 	}
695 
696 	kr_redistribute(kp);
697 	return (0);
698 }
699 
700 static int
701 kroute_uninstall(struct kroute_node *kn)
702 {
703 	/* kill MPLS LSP if one was installed */
704 	if (kn->r.flags & F_LDPD_INSERTED)
705 		if (send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r, AF_MPLS) ==
706 		    -1)
707 			return (-1);
708 
709 	return (0);
710 }
711 
712 static int
713 kroute_remove(struct kroute *kr)
714 {
715 	struct kroute_prefix	*kp;
716 	struct kroute_priority	*kprio;
717 	struct kroute_node	*kn;
718 
719 	kp = kroute_find_prefix(kr->af, &kr->prefix, kr->prefixlen);
720 	if (kp == NULL)
721 		goto notfound;
722 	kprio = kroute_find_prio(kp, kr->priority);
723 	if (kprio == NULL)
724 		goto notfound;
725 	kn = kroute_find_gw(kprio, &kr->nexthop);
726 	if (kn == NULL)
727 		goto notfound;
728 
729 	kr_redist_remove(&kn->r);
730 	kroute_uninstall(kn);
731 
732 	TAILQ_REMOVE(&kprio->nexthops, kn, entry);
733 	free(kn);
734 
735 	if (TAILQ_EMPTY(&kprio->nexthops)) {
736 		TAILQ_REMOVE(&kp->priorities, kprio, entry);
737 		free(kprio);
738 	}
739 
740 	if (TAILQ_EMPTY(&kp->priorities)) {
741 		if (RB_REMOVE(kroute_tree, &krt, kp) == NULL) {
742 			log_warnx("%s failed for %s/%u", __func__,
743 			    log_addr(kr->af, &kr->prefix), kp->prefixlen);
744 			return (-1);
745 		}
746 		free(kp);
747 	} else
748 		kr_redistribute(kp);
749 
750 	return (0);
751 
752 notfound:
753 	log_warnx("%s failed to find %s/%u", __func__,
754 	    log_addr(kr->af, &kr->prefix), kr->prefixlen);
755 	return (-1);
756 }
757 
758 static void
759 kroute_clear(void)
760 {
761 	struct kroute_prefix	*kp;
762 	struct kroute_priority	*kprio;
763 	struct kroute_node	*kn;
764 
765 	while ((kp = RB_MIN(kroute_tree, &krt)) != NULL) {
766 		while ((kprio = TAILQ_FIRST(&kp->priorities)) != NULL) {
767 			while ((kn = TAILQ_FIRST(&kprio->nexthops)) != NULL) {
768 				kr_redist_remove(&kn->r);
769 				kroute_uninstall(kn);
770 				TAILQ_REMOVE(&kprio->nexthops, kn, entry);
771 				free(kn);
772 			}
773 			TAILQ_REMOVE(&kp->priorities, kprio, entry);
774 			free(kprio);
775 		}
776 		RB_REMOVE(kroute_tree, &krt, kp);
777 		free(kp);
778 	}
779 }
780 
781 static __inline int
782 kif_compare(struct kif_node *a, struct kif_node *b)
783 {
784 	return (b->k.ifindex - a->k.ifindex);
785 }
786 
787 /* tree management */
788 static struct kif_node *
789 kif_find(unsigned short ifindex)
790 {
791 	struct kif_node	s;
792 
793 	memset(&s, 0, sizeof(s));
794 	s.k.ifindex = ifindex;
795 
796 	return (RB_FIND(kif_tree, &kit, &s));
797 }
798 
799 struct kif *
800 kif_findname(char *ifname)
801 {
802 	struct kif_node	*kif;
803 
804 	RB_FOREACH(kif, kif_tree, &kit)
805 		if (!strcmp(ifname, kif->k.ifname))
806 			return (&kif->k);
807 
808 	return (NULL);
809 }
810 
811 static struct kif_node *
812 kif_insert(unsigned short ifindex)
813 {
814 	struct kif_node	*kif;
815 
816 	if ((kif = calloc(1, sizeof(struct kif_node))) == NULL)
817 		return (NULL);
818 
819 	kif->k.ifindex = ifindex;
820 	TAILQ_INIT(&kif->addrs);
821 
822 	if (RB_INSERT(kif_tree, &kit, kif) != NULL)
823 		fatalx("kif_insert: RB_INSERT");
824 
825 	return (kif);
826 }
827 
828 static int
829 kif_remove(struct kif_node *kif)
830 {
831 	struct kif_addr	*ka;
832 
833 	if (RB_REMOVE(kif_tree, &kit, kif) == NULL) {
834 		log_warnx("RB_REMOVE(kif_tree, &kit, kif)");
835 		return (-1);
836 	}
837 
838 	while ((ka = TAILQ_FIRST(&kif->addrs)) != NULL) {
839 		main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, sizeof(ka->a));
840 		TAILQ_REMOVE(&kif->addrs, ka, entry);
841 		free(ka);
842 	}
843 	free(kif);
844 	return (0);
845 }
846 
847 void
848 kif_clear(void)
849 {
850 	struct kif_node	*kif;
851 
852 	while ((kif = RB_MIN(kif_tree, &kit)) != NULL)
853 		kif_remove(kif);
854 }
855 
856 static struct kif_node *
857 kif_update(unsigned short ifindex, int flags, struct if_data *ifd,
858     struct sockaddr_dl *sdl, int *link_old)
859 {
860 	struct kif_node		*kif;
861 
862 	if ((kif = kif_find(ifindex)) == NULL) {
863 		if ((kif = kif_insert(ifindex)) == NULL)
864 			return (NULL);
865 	} else
866 		*link_old = (kif->k.flags & IFF_UP) &&
867 		    LINK_STATE_IS_UP(kif->k.link_state);
868 
869 	kif->k.flags = flags;
870 	kif->k.link_state = ifd->ifi_link_state;
871 	kif->k.if_type = ifd->ifi_type;
872 	kif->k.baudrate = ifd->ifi_baudrate;
873 	kif->k.mtu = ifd->ifi_mtu;
874 
875 	if (sdl && sdl->sdl_family == AF_LINK) {
876 		if (sdl->sdl_nlen >= sizeof(kif->k.ifname))
877 			memcpy(kif->k.ifname, sdl->sdl_data,
878 			    sizeof(kif->k.ifname) - 1);
879 		else if (sdl->sdl_nlen > 0)
880 			memcpy(kif->k.ifname, sdl->sdl_data,
881 			    sdl->sdl_nlen);
882 		/* string already terminated via calloc() */
883 	}
884 
885 	return (kif);
886 }
887 
888 static struct kroute_priority *
889 kroute_match(int af, union ldpd_addr *key)
890 {
891 	int			 i, maxprefixlen;
892 	struct kroute_prefix	*kp;
893 	struct kroute_priority	*kprio;
894 	union ldpd_addr		 addr;
895 
896 	switch (af) {
897 	case AF_INET:
898 		maxprefixlen = 32;
899 		break;
900 	case AF_INET6:
901 		maxprefixlen = 128;
902 		break;
903 	default:
904 		log_warnx("%s: unknown af", __func__);
905 		return (NULL);
906 	}
907 
908 	for (i = maxprefixlen; i >= 0; i--) {
909 		ldp_applymask(af, &addr, key, i);
910 
911 		kp = kroute_find_prefix(af, &addr, i);
912 		if (kp == NULL)
913 			continue;
914 
915 		kprio = kroute_find_prio(kp, RTP_ANY);
916 		if (kprio != NULL)
917 			return (kprio);
918 	}
919 
920 	return (NULL);
921 }
922 
923 /* misc */
924 static uint8_t
925 prefixlen_classful(in_addr_t ina)
926 {
927 	/* it hurt to write this. */
928 
929 	if (ina >= 0xf0000000U)		/* class E */
930 		return (32);
931 	else if (ina >= 0xe0000000U)	/* class D */
932 		return (4);
933 	else if (ina >= 0xc0000000U)	/* class C */
934 		return (24);
935 	else if (ina >= 0x80000000U)	/* class B */
936 		return (16);
937 	else				/* class A */
938 		return (8);
939 }
940 
941 #define	ROUNDUP(a)	\
942     (((a) & (sizeof(long) - 1)) ? (1 + ((a) | (sizeof(long) - 1))) : (a))
943 
944 static void
945 get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info)
946 {
947 	int	i;
948 
949 	for (i = 0; i < RTAX_MAX; i++) {
950 		if (addrs & (1 << i)) {
951 			rti_info[i] = sa;
952 			sa = (struct sockaddr *)((char *)(sa) +
953 			    ROUNDUP(sa->sa_len));
954 		} else
955 			rti_info[i] = NULL;
956 	}
957 }
958 
959 static void
960 if_change(unsigned short ifindex, int flags, struct if_data *ifd,
961     struct sockaddr_dl *sdl)
962 {
963 	struct kif_node		*kif;
964 	struct kif_addr		*ka;
965 	int			 link_old = 0, link_new;
966 
967 	kif = kif_update(ifindex, flags, ifd, sdl, &link_old);
968 	if (!kif) {
969 		log_warn("%s: kif_update(%u)", __func__, ifindex);
970 		return;
971 	}
972 	link_new = (kif->k.flags & IFF_UP) &&
973 	    LINK_STATE_IS_UP(kif->k.link_state);
974 
975 	if (link_new == link_old)
976 		return;
977 
978 	main_imsg_compose_ldpe(IMSG_IFSTATUS, 0, &kif->k, sizeof(struct kif));
979 	if (link_new) {
980 		TAILQ_FOREACH(ka, &kif->addrs, entry)
981 			main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a,
982 			    sizeof(ka->a));
983 	} else {
984 		TAILQ_FOREACH(ka, &kif->addrs, entry)
985 			main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a,
986 			    sizeof(ka->a));
987 	}
988 }
989 
990 static void
991 if_newaddr(unsigned short ifindex, struct sockaddr *ifa, struct sockaddr *mask,
992     struct sockaddr *brd)
993 {
994 	struct kif_node		*kif;
995 	struct sockaddr_in	*ifa4, *mask4, *brd4;
996 	struct sockaddr_in6	*ifa6, *mask6, *brd6;
997 	struct kif_addr		*ka;
998 
999 	if (ifa == NULL)
1000 		return;
1001 	if ((kif = kif_find(ifindex)) == NULL) {
1002 		log_warnx("%s: corresponding if %d not found", __func__,
1003 		    ifindex);
1004 		return;
1005 	}
1006 
1007 	switch (ifa->sa_family) {
1008 	case AF_INET:
1009 		ifa4 = (struct sockaddr_in *) ifa;
1010 		mask4 = (struct sockaddr_in *) mask;
1011 		brd4 = (struct sockaddr_in *) brd;
1012 
1013 		/* filter out unwanted addresses */
1014 		if (bad_addr_v4(ifa4->sin_addr))
1015 			return;
1016 
1017 		if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL)
1018 			fatal("if_newaddr");
1019 		ka->a.addr.v4 = ifa4->sin_addr;
1020 		if (mask4)
1021 			ka->a.prefixlen =
1022 			    mask2prefixlen(mask4->sin_addr.s_addr);
1023 		if (brd4)
1024 			ka->a.dstbrd.v4 = brd4->sin_addr;
1025 		break;
1026 	case AF_INET6:
1027 		ifa6 = (struct sockaddr_in6 *) ifa;
1028 		mask6 = (struct sockaddr_in6 *) mask;
1029 		brd6 = (struct sockaddr_in6 *) brd;
1030 
1031 		/* We only care about link-local and global-scope. */
1032 		if (bad_addr_v6(&ifa6->sin6_addr))
1033 			return;
1034 
1035 		clearscope(&ifa6->sin6_addr);
1036 
1037 		if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL)
1038 			fatal("if_newaddr");
1039 		ka->a.addr.v6 = ifa6->sin6_addr;
1040 		if (mask6)
1041 			ka->a.prefixlen = mask2prefixlen6(mask6);
1042 		if (brd6)
1043 			ka->a.dstbrd.v6 = brd6->sin6_addr;
1044 		break;
1045 	default:
1046 		return;
1047 	}
1048 
1049 	ka->a.ifindex = ifindex;
1050 	ka->a.af = ifa->sa_family;
1051 	TAILQ_INSERT_TAIL(&kif->addrs, ka, entry);
1052 
1053 	/* notify ldpe about new address */
1054 	main_imsg_compose_ldpe(IMSG_NEWADDR, 0, &ka->a, sizeof(ka->a));
1055 }
1056 
1057 static void
1058 if_deladdr(unsigned short ifindex, struct sockaddr *ifa, struct sockaddr *mask,
1059     struct sockaddr *brd)
1060 {
1061 	struct kif_node		*kif;
1062 	struct sockaddr_in	*ifa4, *mask4, *brd4;
1063 	struct sockaddr_in6	*ifa6, *mask6, *brd6;
1064 	struct kaddr		 k;
1065 	struct kif_addr		*ka, *nka;
1066 
1067 	if (ifa == NULL)
1068 		return;
1069 	if ((kif = kif_find(ifindex)) == NULL) {
1070 		log_warnx("%s: corresponding if %d not found", __func__,
1071 		    ifindex);
1072 		return;
1073 	}
1074 
1075 	memset(&k, 0, sizeof(k));
1076 	k.af = ifa->sa_family;
1077 	switch (ifa->sa_family) {
1078 	case AF_INET:
1079 		ifa4 = (struct sockaddr_in *) ifa;
1080 		mask4 = (struct sockaddr_in *) mask;
1081 		brd4 = (struct sockaddr_in *) brd;
1082 
1083 		/* filter out unwanted addresses */
1084 		if (bad_addr_v4(ifa4->sin_addr))
1085 			return;
1086 
1087 		k.addr.v4 = ifa4->sin_addr;
1088 		if (mask4)
1089 			k.prefixlen = mask2prefixlen(mask4->sin_addr.s_addr);
1090 		if (brd4)
1091 			k.dstbrd.v4 = brd4->sin_addr;
1092 		break;
1093 	case AF_INET6:
1094 		ifa6 = (struct sockaddr_in6 *) ifa;
1095 		mask6 = (struct sockaddr_in6 *) mask;
1096 		brd6 = (struct sockaddr_in6 *) brd;
1097 
1098 		/* We only care about link-local and global-scope. */
1099 		if (bad_addr_v6(&ifa6->sin6_addr))
1100 			return;
1101 
1102 		clearscope(&ifa6->sin6_addr);
1103 
1104 		k.addr.v6 = ifa6->sin6_addr;
1105 		if (mask6)
1106 			k.prefixlen = mask2prefixlen6(mask6);
1107 		if (brd6)
1108 			k.dstbrd.v6 = brd6->sin6_addr;
1109 		break;
1110 	default:
1111 		return;
1112 	}
1113 
1114 	for (ka = TAILQ_FIRST(&kif->addrs); ka != NULL; ka = nka) {
1115 		nka = TAILQ_NEXT(ka, entry);
1116 
1117 		if (ka->a.af != k.af ||
1118 		    ka->a.prefixlen != k.prefixlen ||
1119 		    ldp_addrcmp(ka->a.af, &ka->a.addr, &k.addr))
1120 			continue;
1121 
1122 		/* notify ldpe about removed address */
1123 		main_imsg_compose_ldpe(IMSG_DELADDR, 0, &ka->a, sizeof(ka->a));
1124 		TAILQ_REMOVE(&kif->addrs, ka, entry);
1125 		free(ka);
1126 		return;
1127 	}
1128 }
1129 
1130 static void
1131 if_announce(void *msg)
1132 {
1133 	struct if_announcemsghdr	*ifan;
1134 	struct kif_node			*kif;
1135 
1136 	ifan = msg;
1137 
1138 	switch (ifan->ifan_what) {
1139 	case IFAN_ARRIVAL:
1140 		kif = kif_insert(ifan->ifan_index);
1141 		if (kif)
1142 			strlcpy(kif->k.ifname, ifan->ifan_name,
1143 			    sizeof(kif->k.ifname));
1144 		break;
1145 	case IFAN_DEPARTURE:
1146 		kif = kif_find(ifan->ifan_index);
1147 		if (kif)
1148 			kif_remove(kif);
1149 		break;
1150 	}
1151 }
1152 
1153 /* rtsock */
1154 static int
1155 send_rtmsg(int fd, int action, struct kroute *kr, int family)
1156 {
1157 	switch (kr->af) {
1158 	case AF_INET:
1159 		return (send_rtmsg_v4(fd, action, kr, family));
1160 	case AF_INET6:
1161 		return (send_rtmsg_v6(fd, action, kr, family));
1162 	default:
1163 		fatalx("send_rtmsg: unknown af");
1164 	}
1165 }
1166 
1167 static int
1168 send_rtmsg_v4(int fd, int action, struct kroute *kr, int family)
1169 {
1170 	struct iovec		iov[5];
1171 	struct rt_msghdr	hdr;
1172 	struct sockaddr_mpls	label_in, label_out;
1173 	struct sockaddr_in	dst, mask, nexthop;
1174 	int			iovcnt = 0;
1175 
1176 	if (kr_state.fib_sync == 0)
1177 		return (0);
1178 
1179 	/*
1180 	 * Reserved labels (implicit and explicit NULL) should not be added
1181 	 * to the FIB.
1182 	 */
1183 	if (family == AF_MPLS && kr->local_label < MPLS_LABEL_RESERVED_MAX)
1184 		return (0);
1185 
1186 	/* initialize header */
1187 	memset(&hdr, 0, sizeof(hdr));
1188 	hdr.rtm_version = RTM_VERSION;
1189 
1190 	hdr.rtm_type = action;
1191 	hdr.rtm_flags = RTF_UP;
1192 	hdr.rtm_fmask = RTF_MPLS;
1193 	hdr.rtm_seq = kr_state.rtseq++;	/* overflow doesn't matter */
1194 	hdr.rtm_msglen = sizeof(hdr);
1195 	hdr.rtm_hdrlen = sizeof(struct rt_msghdr);
1196 	hdr.rtm_priority = kr->priority;
1197 	/* adjust iovec */
1198 	iov[iovcnt].iov_base = &hdr;
1199 	iov[iovcnt++].iov_len = sizeof(hdr);
1200 
1201 	if (family == AF_MPLS) {
1202 		memset(&label_in, 0, sizeof(label_in));
1203 		label_in.smpls_len = sizeof(label_in);
1204 		label_in.smpls_family = AF_MPLS;
1205 		label_in.smpls_label =
1206 		    htonl(kr->local_label << MPLS_LABEL_OFFSET);
1207 		/* adjust header */
1208 		hdr.rtm_flags |= RTF_MPLS | RTF_MPATH;
1209 		hdr.rtm_addrs |= RTA_DST;
1210 		hdr.rtm_msglen += sizeof(label_in);
1211 		/* adjust iovec */
1212 		iov[iovcnt].iov_base = &label_in;
1213 		iov[iovcnt++].iov_len = sizeof(label_in);
1214 	} else {
1215 		memset(&dst, 0, sizeof(dst));
1216 		dst.sin_len = sizeof(dst);
1217 		dst.sin_family = AF_INET;
1218 		dst.sin_addr = kr->prefix.v4;
1219 		/* adjust header */
1220 		hdr.rtm_addrs |= RTA_DST;
1221 		hdr.rtm_msglen += sizeof(dst);
1222 		/* adjust iovec */
1223 		iov[iovcnt].iov_base = &dst;
1224 		iov[iovcnt++].iov_len = sizeof(dst);
1225 	}
1226 
1227 	memset(&nexthop, 0, sizeof(nexthop));
1228 	nexthop.sin_len = sizeof(nexthop);
1229 	nexthop.sin_family = AF_INET;
1230 	nexthop.sin_addr = kr->nexthop.v4;
1231 	/* adjust header */
1232 	hdr.rtm_flags |= RTF_GATEWAY;
1233 	hdr.rtm_addrs |= RTA_GATEWAY;
1234 	hdr.rtm_msglen += sizeof(nexthop);
1235 	/* adjust iovec */
1236 	iov[iovcnt].iov_base = &nexthop;
1237 	iov[iovcnt++].iov_len = sizeof(nexthop);
1238 
1239 	if (family == AF_INET) {
1240 		memset(&mask, 0, sizeof(mask));
1241 		mask.sin_len = sizeof(mask);
1242 		mask.sin_family = AF_INET;
1243 		mask.sin_addr.s_addr = prefixlen2mask(kr->prefixlen);
1244 		/* adjust header */
1245 		hdr.rtm_addrs |= RTA_NETMASK;
1246 		hdr.rtm_msglen += sizeof(mask);
1247 		/* adjust iovec */
1248 		iov[iovcnt].iov_base = &mask;
1249 		iov[iovcnt++].iov_len = sizeof(mask);
1250 	}
1251 
1252 	/* If action is RTM_DELETE we have to get rid of MPLS infos */
1253 	if (kr->remote_label != NO_LABEL && action != RTM_DELETE) {
1254 		memset(&label_out, 0, sizeof(label_out));
1255 		label_out.smpls_len = sizeof(label_out);
1256 		label_out.smpls_family = AF_MPLS;
1257 		label_out.smpls_label =
1258 		    htonl(kr->remote_label << MPLS_LABEL_OFFSET);
1259 		/* adjust header */
1260 		hdr.rtm_addrs |= RTA_SRC;
1261 		hdr.rtm_flags |= RTF_MPLS;
1262 		hdr.rtm_msglen += sizeof(label_out);
1263 		/* adjust iovec */
1264 		iov[iovcnt].iov_base = &label_out;
1265 		iov[iovcnt++].iov_len = sizeof(label_out);
1266 
1267 		if (kr->remote_label == MPLS_LABEL_IMPLNULL) {
1268 			if (family == AF_MPLS)
1269 				hdr.rtm_mpls = MPLS_OP_POP;
1270 			else
1271 				return (0);
1272 		} else {
1273 			if (family == AF_MPLS)
1274 				hdr.rtm_mpls = MPLS_OP_SWAP;
1275 			else
1276 				hdr.rtm_mpls = MPLS_OP_PUSH;
1277 		}
1278 	}
1279 
1280 retry:
1281 	if (writev(fd, iov, iovcnt) == -1) {
1282 		if (errno == ESRCH) {
1283 			if (hdr.rtm_type == RTM_CHANGE && family == AF_MPLS) {
1284 				hdr.rtm_type = RTM_ADD;
1285 				goto retry;
1286 			} else if (hdr.rtm_type == RTM_DELETE) {
1287 				log_info("route %s/%u vanished before delete",
1288 				    inet_ntoa(kr->prefix.v4), kr->prefixlen);
1289 				return (-1);
1290 			}
1291 		}
1292 		log_warn("%s action %u, af %s, prefix %s/%u", __func__,
1293 		    hdr.rtm_type, af_name(family), inet_ntoa(kr->prefix.v4),
1294 		    kr->prefixlen);
1295 		return (-1);
1296 	}
1297 
1298 	return (0);
1299 }
1300 
1301 static int
1302 send_rtmsg_v6(int fd, int action, struct kroute *kr, int family)
1303 {
1304 	return (0);
1305 }
1306 
1307 static int
1308 fetchtable(void)
1309 {
1310 	size_t			 len;
1311 	int			 mib[7];
1312 	char			*buf;
1313 	int			 rv;
1314 
1315 	mib[0] = CTL_NET;
1316 	mib[1] = PF_ROUTE;
1317 	mib[2] = 0;
1318 	mib[3] = 0;
1319 	mib[4] = NET_RT_DUMP;
1320 	mib[5] = 0;
1321 	mib[6] = 0;	/* rtableid */
1322 
1323 	if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) {
1324 		log_warn("sysctl");
1325 		return (-1);
1326 	}
1327 	if ((buf = malloc(len)) == NULL) {
1328 		log_warn(__func__);
1329 		return (-1);
1330 	}
1331 	if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) {
1332 		log_warn("sysctl");
1333 		free(buf);
1334 		return (-1);
1335 	}
1336 
1337 	rv = rtmsg_process(buf, len);
1338 	free(buf);
1339 
1340 	return (rv);
1341 }
1342 
1343 static int
1344 fetchifs(void)
1345 {
1346 	size_t			 len;
1347 	int			 mib[6];
1348 	char			*buf;
1349 	int			 rv;
1350 
1351 	mib[0] = CTL_NET;
1352 	mib[1] = PF_ROUTE;
1353 	mib[2] = 0;
1354 	mib[3] = 0;	/* wildcard */
1355 	mib[4] = NET_RT_IFLIST;
1356 	mib[5] = 0;
1357 
1358 	if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) {
1359 		log_warn("sysctl");
1360 		return (-1);
1361 	}
1362 	if ((buf = malloc(len)) == NULL) {
1363 		log_warn(__func__);
1364 		return (-1);
1365 	}
1366 	if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) {
1367 		log_warn("sysctl");
1368 		free(buf);
1369 		return (-1);
1370 	}
1371 
1372 	rv = rtmsg_process(buf, len);
1373 	free(buf);
1374 
1375 	return (rv);
1376 }
1377 
1378 static int
1379 dispatch_rtmsg(void)
1380 {
1381 	char			 buf[RT_BUF_SIZE];
1382 	ssize_t			 n;
1383 
1384 	if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) {
1385 		if (errno == EAGAIN || errno == EINTR)
1386 			return (0);
1387 		log_warn("%s: read error", __func__);
1388 		return (-1);
1389 	}
1390 
1391 	if (n == 0) {
1392 		log_warnx("routing socket closed");
1393 		return (-1);
1394 	}
1395 
1396 	return (rtmsg_process(buf, n));
1397 }
1398 
1399 static int
1400 rtmsg_process(char *buf, size_t len)
1401 {
1402 	struct rt_msghdr	*rtm;
1403 	struct if_msghdr	 ifm;
1404 	struct ifa_msghdr	*ifam;
1405 	struct sockaddr		*sa, *rti_info[RTAX_MAX];
1406 	size_t			 offset;
1407 	char			*next;
1408 
1409 	for (offset = 0; offset < len; offset += rtm->rtm_msglen) {
1410 		next = buf + offset;
1411 		rtm = (struct rt_msghdr *)next;
1412 		if (len < offset + sizeof(unsigned short) ||
1413 		    len < offset + rtm->rtm_msglen)
1414 			fatalx("rtmsg_process: partial rtm in buffer");
1415 		if (rtm->rtm_version != RTM_VERSION)
1416 			continue;
1417 		log_rtmsg(rtm->rtm_type);
1418 
1419 		sa = (struct sockaddr *)(next + rtm->rtm_hdrlen);
1420 		get_rtaddrs(rtm->rtm_addrs, sa, rti_info);
1421 
1422 		switch (rtm->rtm_type) {
1423 		case RTM_ADD:
1424 		case RTM_GET:
1425 		case RTM_CHANGE:
1426 		case RTM_DELETE:
1427 			if (rtm->rtm_errno)		/* failed attempts... */
1428 				continue;
1429 
1430 			if (rtm->rtm_tableid != 0)
1431 				continue;
1432 
1433 			if (rtm->rtm_type == RTM_GET &&
1434 			    rtm->rtm_pid != kr_state.pid)
1435 				continue;
1436 
1437 			/* Skip ARP/ND cache and broadcast routes. */
1438 			if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST))
1439 				continue;
1440 
1441 			/* LDP should follow the IGP and ignore BGP routes */
1442 			if (rtm->rtm_priority == RTP_BGP)
1443 				continue;
1444 
1445 			if (rtmsg_process_route(rtm, rti_info) == -1)
1446 				return (-1);
1447 		}
1448 
1449 		switch (rtm->rtm_type) {
1450 		case RTM_IFINFO:
1451 			memcpy(&ifm, next, sizeof(ifm));
1452 			if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data,
1453 			    (struct sockaddr_dl *)rti_info[RTAX_IFP]);
1454 			break;
1455 		case RTM_NEWADDR:
1456 			ifam = (struct ifa_msghdr *)rtm;
1457 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1458 			    RTA_BRD)) == 0)
1459 				break;
1460 
1461 			if_newaddr(ifam->ifam_index,
1462 			    (struct sockaddr *)rti_info[RTAX_IFA],
1463 			    (struct sockaddr *)rti_info[RTAX_NETMASK],
1464 			    (struct sockaddr *)rti_info[RTAX_BRD]);
1465 			break;
1466 		case RTM_DELADDR:
1467 			ifam = (struct ifa_msghdr *)rtm;
1468 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1469 			    RTA_BRD)) == 0)
1470 				break;
1471 
1472 			if_deladdr(ifam->ifam_index,
1473 			    (struct sockaddr *)rti_info[RTAX_IFA],
1474 			    (struct sockaddr *)rti_info[RTAX_NETMASK],
1475 			    (struct sockaddr *)rti_info[RTAX_BRD]);
1476 			break;
1477 		case RTM_IFANNOUNCE:
1478 			if_announce(next);
1479 			break;
1480 		default:
1481 			/* ignore for now */
1482 			break;
1483 		}
1484 	}
1485 
1486 	return (offset);
1487 }
1488 
1489 static int
1490 rtmsg_process_route(struct rt_msghdr *rtm, struct sockaddr *rti_info[RTAX_MAX])
1491 {
1492 	struct sockaddr		*sa;
1493 	struct sockaddr_in	*sa_in;
1494 	struct sockaddr_in6	*sa_in6;
1495 	struct kroute		 kr;
1496 	struct kroute_prefix	*kp;
1497 	struct kroute_priority	*kprio;
1498 	struct kroute_node	*kn;
1499 
1500 	if ((sa = rti_info[RTAX_DST]) == NULL)
1501 		return (-1);
1502 
1503 	memset(&kr, 0, sizeof(kr));
1504 	kr.af = sa->sa_family;
1505 	switch (kr.af) {
1506 	case AF_INET:
1507 		kr.prefix.v4 = ((struct sockaddr_in *)sa)->sin_addr;
1508 		sa_in = (struct sockaddr_in *) rti_info[RTAX_NETMASK];
1509 		if (sa_in != NULL && sa_in->sin_len != 0)
1510 			kr.prefixlen = mask2prefixlen(sa_in->sin_addr.s_addr);
1511 		else if (rtm->rtm_flags & RTF_HOST)
1512 			kr.prefixlen = 32;
1513 		else if (kr.prefix.v4.s_addr == INADDR_ANY)
1514 			kr.prefixlen = 0;
1515 		else
1516 			kr.prefixlen = prefixlen_classful(kr.prefix.v4.s_addr);
1517 		break;
1518 	case AF_INET6:
1519 		kr.prefix.v6 = ((struct sockaddr_in6 *)sa)->sin6_addr;
1520 		sa_in6 = (struct sockaddr_in6 *)rti_info[RTAX_NETMASK];
1521 		if (sa_in6 != NULL && sa_in6->sin6_len != 0)
1522 			kr.prefixlen = mask2prefixlen6(sa_in6);
1523 		else if (rtm->rtm_flags & RTF_HOST)
1524 			kr.prefixlen = 128;
1525 		else if (IN6_IS_ADDR_UNSPECIFIED(&kr.prefix.v6))
1526 			kr.prefixlen = 0;
1527 		else
1528 			fatalx("in6 net addr without netmask");
1529 		break;
1530 	default:
1531 		return (0);
1532 	}
1533 	kr.ifindex = rtm->rtm_index;
1534 	if ((sa = rti_info[RTAX_GATEWAY]) != NULL) {
1535 		switch (sa->sa_family) {
1536 		case AF_INET:
1537 			kr.nexthop.v4 = ((struct sockaddr_in *)sa)->sin_addr;
1538 			break;
1539 		case AF_INET6:
1540 			sa_in6 = (struct sockaddr_in6 *)sa;
1541 			recoverscope(sa_in6);
1542 			kr.nexthop.v6 = sa_in6->sin6_addr;
1543 			if (sa_in6->sin6_scope_id)
1544 				kr.ifindex = sa_in6->sin6_scope_id;
1545 			break;
1546 		case AF_LINK:
1547 			kr.flags |= F_CONNECTED;
1548 			break;
1549 		}
1550 	}
1551 
1552 	if (rtm->rtm_flags & RTF_STATIC)
1553 		kr.flags |= F_STATIC;
1554 	if (rtm->rtm_flags & RTF_BLACKHOLE)
1555 		kr.flags |= F_BLACKHOLE;
1556 	if (rtm->rtm_flags & RTF_REJECT)
1557 		kr.flags |= F_REJECT;
1558 	if (rtm->rtm_flags & RTF_DYNAMIC)
1559 		kr.flags |= F_DYNAMIC;
1560 	/* routes attached to connected or loopback interfaces */
1561 	if (rtm->rtm_flags & RTF_CONNECTED ||
1562 	    ldp_addrcmp(kr.af, &kr.prefix, &kr.nexthop) == 0)
1563 		kr.flags |= F_CONNECTED;
1564 	kr.priority = rtm->rtm_priority;
1565 
1566 	if (rtm->rtm_type == RTM_CHANGE) {
1567 		/*
1568 		 * The kernel doesn't allow RTM_CHANGE for multipath routes.
1569 		 * If we got this message we know that the route has only one
1570 		 * nexthop and we should remove it before installing the same
1571 		 * route with the new nexthop.
1572 		 */
1573 		kp = kroute_find_prefix(kr.af, &kr.prefix, kr.prefixlen);
1574 		if (kp) {
1575 			kprio = kroute_find_prio(kp, kr.priority);
1576 			if (kprio) {
1577 				kn = TAILQ_FIRST(&kprio->nexthops);
1578 				if (kn)
1579 					kroute_remove(&kn->r);
1580 			}
1581 		}
1582 	}
1583 
1584 	kn = NULL;
1585 	kp = kroute_find_prefix(kr.af, &kr.prefix, kr.prefixlen);
1586 	if (kp) {
1587 		kprio = kroute_find_prio(kp, kr.priority);
1588 		if (kprio)
1589 			kn = kroute_find_gw(kprio, &kr.nexthop);
1590 	}
1591 
1592 	if (rtm->rtm_type == RTM_DELETE) {
1593 		if (kn == NULL)
1594 			return (0);
1595 		return (kroute_remove(&kr));
1596 	}
1597 
1598 	if (!ldp_addrisset(kr.af, &kr.nexthop) && !(kr.flags & F_CONNECTED)) {
1599 		log_warnx("%s: no nexthop for %s/%u", __func__,
1600 		    log_addr(kr.af, &kr.prefix), kr.prefixlen);
1601 		return (-1);
1602 	}
1603 
1604 	if (kn != NULL) {
1605 		/* update route */
1606 		kn->r = kr;
1607 		kr_redistribute(kp);
1608 	} else {
1609 		kr.local_label = NO_LABEL;
1610 		kr.remote_label = NO_LABEL;
1611 		kroute_insert(&kr);
1612 	}
1613 
1614 	return (0);
1615 }
1616 
1617 int
1618 kmpw_set(struct kpw *kpw)
1619 {
1620 	struct kif_node		*kif;
1621 
1622 	kif = kif_find(kpw->ifindex);
1623 	if (kif == NULL) {
1624 		log_warnx("%s: failed to find mpw by index (%u)", __func__,
1625 		    kpw->ifindex);
1626 		return (-1);
1627 	}
1628 
1629 	if (kif->kpw == NULL)
1630 		kif->kpw = malloc(sizeof(*kif->kpw));
1631 	*kif->kpw = *kpw;
1632 
1633 	return (kmpw_install(kif->k.ifname, kpw));
1634 }
1635 
1636 int
1637 kmpw_unset(struct kpw *kpw)
1638 {
1639 	struct kif_node		*kif;
1640 
1641 	kif = kif_find(kpw->ifindex);
1642 	if (kif == NULL) {
1643 		log_warnx("%s: failed to find mpw by index (%u)", __func__,
1644 		    kpw->ifindex);
1645 		return (-1);
1646 	}
1647 
1648 	if (kif->kpw == NULL) {
1649 		log_warnx("%s: %s is not set", __func__, kif->k.ifname);
1650 		return (-1);
1651 	}
1652 
1653 	free(kif->kpw);
1654 	kif->kpw = NULL;
1655 	return (kmpw_uninstall(kif->k.ifname));
1656 }
1657 
1658 static int
1659 kmpw_install(const char *ifname, struct kpw *kpw)
1660 {
1661 	struct ifreq		 ifr;
1662 	struct ifmpwreq		 imr;
1663 
1664 	memset(&imr, 0, sizeof(imr));
1665 	switch (kpw->pw_type) {
1666 	case PW_TYPE_ETHERNET:
1667 		imr.imr_type = IMR_TYPE_ETHERNET;
1668 		break;
1669 	case PW_TYPE_ETHERNET_TAGGED:
1670 		imr.imr_type = IMR_TYPE_ETHERNET_TAGGED;
1671 		break;
1672 	default:
1673 		log_warnx("%s: unhandled pseudowire type (%#X)", __func__,
1674 		    kpw->pw_type);
1675 		return (-1);
1676 	}
1677 
1678 	if (kpw->flags & F_PW_CWORD)
1679 		imr.imr_flags |= IMR_FLAG_CONTROLWORD;
1680 
1681 	memcpy(&imr.imr_nexthop, addr2sa(kpw->af, &kpw->nexthop, 0),
1682 	    sizeof(imr.imr_nexthop));
1683 
1684 	imr.imr_lshim.shim_label = kpw->local_label;
1685 	imr.imr_rshim.shim_label = kpw->remote_label;
1686 
1687 	memset(&ifr, 0, sizeof(ifr));
1688 	strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1689 	ifr.ifr_data = (caddr_t) &imr;
1690 	if (ioctl(kr_state.ioctl_fd, SIOCSETMPWCFG, &ifr)) {
1691 		log_warn("ioctl SIOCSETMPWCFG");
1692 		return (-1);
1693 	}
1694 
1695 	return (0);
1696 }
1697 
1698 static int
1699 kmpw_uninstall(const char *ifname)
1700 {
1701 	struct ifreq		 ifr;
1702 	struct ifmpwreq		 imr;
1703 
1704 	memset(&ifr, 0, sizeof(ifr));
1705 	memset(&imr, 0, sizeof(imr));
1706 	strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1707 	ifr.ifr_data = (caddr_t) &imr;
1708 	if (ioctl(kr_state.ioctl_fd, SIOCSETMPWCFG, &ifr)) {
1709 		log_warn("ioctl SIOCSETMPWCFG");
1710 		return (-1);
1711 	}
1712 
1713 	return (0);
1714 }
1715