xref: /minix3/minix/net/lwip/route.c (revision ef8d499e2d2af900e9b2ab297171d7b088652482)
1 /* LWIP service - route.c - route management */
2 /*
3  * This module provides a destination-based routing implementation, roughly
4  * matching the routing as done traditionally by the BSDs and by current NetBSD
5  * in particular.  As such, this implementation almost completely replaces
6  * lwIP's own more limited (and less rigid) routing algorithms.  It does this
7  * using a combination of overriding lwIP functions (ip4_route, ip6_route) with
8  * weak-symbol patching, and lwIP-provided gateway hooks.  Especially the
9  * former gives us a level of control that lwIP's routing hooks do not provide:
10  * not only does such overriding give us the ability to flag that no route was
11  * found at all, we also bypass a number of default decisions taken by lwIP
12  * where the routing hooks are not called at all.
13  *
14  * As a result, the routing tables as visible to the user are an almost
15  * completely accurate reflection of the routing decisions taken by this TCP/IP
16  * stack in practice.  There is currently only one exception: for IPv4 gateway
17  * selection, lwIP will bypass the gateway hook if the given address is on the
18  * local subnet according to the locally assigned IP address and subnet mask.
19  * This exception should practically affect noone, though.
20  *
21  * Our routing implementation differs from NetBSD's in various aspects, though.
22  * Perhaps the most important one, also noted elsewhere, is that we do not
23  * support the coexistence of an all-bits-set network route and a host route
24  * for the same IP address.  If necessary, this issue can be resolved.
25  *
26  * We use a custom concept of "immutable" routes for local addresses, which are
27  * a somewhat special case as explained in the ifaddr module.  Since those
28  * RTF_LOCAL routes cannot be deleted, a small change is made to the route(8)
29  * flush-all command to skip them.  Packets directed at local addresses on
30  * non-loopback interfaces are handled in a way that differs from NetBSD's,
31  * too.  This is explained in the ifdev module.
32  *
33  * The BSDs support special routes that reject or blackhole packets, based on
34  * routing flags.  We support such routes as well, but implement them somewhat
35  * differently from the BSDs: such packets always get routed over a loopback
36  * interface (regardless of their associated interface), in order to save on
37  * routing lookups for packets in the common case.
38  *
39  * As general rules of thumb: if there is no route to a destination, assignment
40  * of a local address will already fail with a "no route to host" error.  If
41  * there is an RTF_REJECT route, a local address will be assigned, but actual
42  * packets will be routed to a loopback interface and result in a "no route to
43  * host" error upon reception there - this is what NetBSD seems to do too, even
44  * though the documentation says that RTF_REJECT routes generate ICMP messages
45  * instead.  RTF_BLACKHOLE behaves similarly to RTF_REJECT, except that the
46  * packet is simply discarded upon receipt by the loopback interface.
47  *
48  * In various places, both here and elsewhere, we check to make sure that on
49  * routing and output, scoped IPv6 source and destination addresses never leave
50  * their zone.  For example, a packet must not be sent to an outgoing interface
51  * if its source address is a link-local address with a zone for another
52  * interface.  lwIP does not check for such violations, and so we must make
53  * sure that this does not happen ourselves.
54  *
55  * Normally, one would tell lwIP to use a particular default IPv4 gateway by
56  * associating the gateway address to a particular interface, and then setting
57  * that interface as default interface (netif_default).  We explicitly do
58  * neither of these things.  Instead, the routing hooks should return the
59  * default route whenever applicable, and the gateway hooks should return the
60  * default route's gateway IP address whenever needed.
61  *
62  * Due to lwIP's limited set of error codes, we do not properly distinguish
63  * between cases where EHOSTUNREACH or ENETUNREACH should be thrown, and throw
64  * the former in most cases.
65  */
66 
67 #include "lwip.h"
68 #include "ifaddr.h"
69 #include "rttree.h"
70 #include "rtsock.h"
71 #include "route.h"
72 #include "lldata.h"
73 
74 #include "lwip/nd6.h"
75 
76 /*
77  * The maximum number of uint8_t bytes needed to represent a routing address.
78  * This value is the maximum of 4 (for IPv4) and 16 (for IPv6).
79  */
80 #define ROUTE_ADDR_MAX	(MAX(IP4_BITS, IP6_BITS) / NBBY)
81 
82 /*
83  * We use a shared routing entry data structure for IPv4 and IPv6 routing
84  * entries.  The result is cleaner code at the cost of (currently) about 2.3KB
85  * of memory wasted (costing 12 bytes per address for three addresses for 64 of
86  * the 128 routing entries that would be for IPv4), although with the benefit
87  * that either address family may use more than half of the routing entries.
88  * From that 2.3KB, 1KB can be reclaimed by moving the destination address and
89  * mask into the rttree_entry data structure, at the cost of its generality.
90  */
91 struct route_entry {
92 	struct rttree_entry re_entry;		/* routing tree entry */
93 	union pxfer_re_pu {
94 		struct ifdev *repu_ifdev;	/* associated interface */
95 		SIMPLEQ_ENTRY(route_entry) repu_next;	/* next free pointer */
96 	} re_pu;
97 	unsigned int re_flags;			/* routing flags (RTF_) */
98 	unsigned int re_use;			/* number of times used */
99 	uint8_t re_addr[ROUTE_ADDR_MAX];	/* destination address */
100 	uint8_t re_mask[ROUTE_ADDR_MAX];	/* destination mask */
101 	union ixfer_re_gu {
102 		ip4_addr_p_t regu_gw4;		/* gateway (IPv4) */
103 		ip6_addr_p_t regu_gw6;		/* gateway (IPv6) */
104 	} re_gu;
105 };
106 #define re_ifdev	re_pu.repu_ifdev
107 #define re_next		re_pu.repu_next
108 #define re_gw4		re_gu.regu_gw4
109 #define re_gw6		re_gu.regu_gw6
110 
111 /* Routes for local addresses are immutable, for reasons explained in ifdev. */
112 #define route_is_immutable(route)	((route)->re_flags & RTF_LOCAL)
113 
114 /*
115  * We override a subset of the BSD routing flags in order to store our own
116  * local settings.  In particular, we have to have a way to store whether a
117  * route is for an IPv4 or IPv6 destination address.  We override BSD's
118  * RTF_DONE flag for this: RTF_DONE is only used with routing sockets, and
119  * never associated with actual routes.  In contrast, RTF_IPV6 is only used
120  * with actual routes, and never sent across routing sockets.  In general,
121  * overriding flags is preferable to adding new ones, as BSD might later add
122  * more flags itself as well, while it can never remove existing flags.
123  */
124 #define RTF_IPV6	RTF_DONE	/* route is for an IPv6 destination */
125 
126 /* The total number of routing entries (IPv4 and IPv6 combined). */
127 #define NR_ROUTE_ENTRY	128
128 
129 static struct route_entry route_array[NR_ROUTE_ENTRY];	/* routing entries */
130 
131 static SIMPLEQ_HEAD(, route_entry) route_freelist;	/* free entry list */
132 
133 /* The routing trees.  There are two: one for IPv4 and one for IPv6. */
134 #define ROUTE_TREE_V4	0
135 #define ROUTE_TREE_V6	1
136 #define NR_ROUTE_TREE	2
137 
138 static struct rttree route_tree[NR_ROUTE_TREE];
139 
140 /* We support a single cached routing entry per address family (IPv4, IPv6). */
141 static int rtcache_v4set;
142 static ip4_addr_t rtcache_v4addr;
143 static struct route_entry *rtcache_v4route;
144 
145 static int rtcache_v6set;
146 static ip6_addr_t rtcache_v6addr;
147 static struct route_entry *rtcache_v6route;
148 
149 /*
150  * Initialize the routing cache.  There are a lot of trivial functions here,
151  * but this is designed to be extended in the future.
152  */
153 static void
rtcache_init(void)154 rtcache_init(void)
155 {
156 
157 	rtcache_v4set = FALSE;
158 	rtcache_v6set = FALSE;
159 }
160 
161 /*
162  * Look up the given IPv4 address in the routing cache.  If there is a match,
163  * return TRUE with the associated route in 'route', possibly NULL if a
164  * negative result was cached.  Return FALSE if the routing cache does not
165  * cache the given IPv4 address.
166  */
167 static inline int
rtcache_lookup_v4(const ip4_addr_t * ipaddr,struct route_entry ** route)168 rtcache_lookup_v4(const ip4_addr_t * ipaddr, struct route_entry ** route)
169 {
170 
171 	if (rtcache_v4set && ip4_addr_cmp(&rtcache_v4addr, ipaddr)) {
172 		*route = rtcache_v4route;
173 
174 		return TRUE;
175 	} else
176 		return FALSE;
177 }
178 
179 /*
180  * Add the given IPv4 address and the given routing entry (NULL for negative
181  * caching) to the routing cache.
182  */
183 static inline void
rtcache_add_v4(const ip4_addr_t * ipaddr,struct route_entry * route)184 rtcache_add_v4(const ip4_addr_t * ipaddr, struct route_entry * route)
185 {
186 
187 	rtcache_v4addr = *ipaddr;
188 	rtcache_v4route = route;
189 	rtcache_v4set = TRUE;
190 }
191 
192 /*
193  * Reset the IPv4 routing cache.
194  */
195 static void
rtcache_reset_v4(void)196 rtcache_reset_v4(void)
197 {
198 
199 	rtcache_v4set = FALSE;
200 }
201 
202 /*
203  * Look up the given IPv6 address in the routing cache.  If there is a match,
204  * return TRUE with the associated route in 'route', possibly NULL if a
205  * negative result was cached.  Return FALSE if the routing cache does not
206  * cache the given IPv6 address.
207  */
208 static inline int
rtcache_lookup_v6(const ip6_addr_t * ipaddr,struct route_entry ** route)209 rtcache_lookup_v6(const ip6_addr_t * ipaddr, struct route_entry ** route)
210 {
211 
212 	if (rtcache_v6set && ip6_addr_cmp(&rtcache_v6addr, ipaddr)) {
213 		*route = rtcache_v6route;
214 
215 		return TRUE;
216 	} else
217 		return FALSE;
218 }
219 
220 /*
221  * Add the given IPv6 address and the given routing entry (NULL for negative
222  * caching) to the routing cache.  Caching of scoped addresses without zones is
223  * not supported.
224  */
225 static inline void
rtcache_add_v6(const ip6_addr_t * ipaddr,struct route_entry * route)226 rtcache_add_v6(const ip6_addr_t * ipaddr, struct route_entry * route)
227 {
228 
229 	rtcache_v6addr = *ipaddr;
230 	rtcache_v6route = route;
231 	rtcache_v6set = TRUE;
232 }
233 
234 /*
235  * Reset the IPv6 routing cache.
236  */
237 static void
rtcache_reset_v6(void)238 rtcache_reset_v6(void)
239 {
240 
241 	rtcache_v6set = FALSE;
242 }
243 
244 /*
245  * Initialize the routing module.
246  */
247 void
route_init(void)248 route_init(void)
249 {
250 	unsigned int slot;
251 
252 	/* Initialize the routing trees. */
253 	rttree_init(&route_tree[ROUTE_TREE_V4], IP4_BITS);
254 	rttree_init(&route_tree[ROUTE_TREE_V6], IP6_BITS);
255 
256 	/* Initialize the list of free routing entries. */
257 	SIMPLEQ_INIT(&route_freelist);
258 
259 	for (slot = 0; slot < __arraycount(route_array); slot++)
260 		SIMPLEQ_INSERT_TAIL(&route_freelist, &route_array[slot],
261 		    re_next);
262 
263 	/* Reset the routing cache. */
264 	rtcache_init();
265 }
266 
267 /*
268  * Prepare for a routing tree operation by converting the given IPv4 address
269  * into a raw address that can be used in that routing tree operation.
270  */
271 static inline void
route_prepare_v4(const ip4_addr_t * ip4addr,uint8_t rtaddr[ROUTE_ADDR_MAX])272 route_prepare_v4(const ip4_addr_t * ip4addr, uint8_t rtaddr[ROUTE_ADDR_MAX])
273 {
274 	uint32_t val;
275 
276 	val = ip4_addr_get_u32(ip4addr);
277 
278 	memcpy(rtaddr, &val, sizeof(val));
279 }
280 
281 /*
282  * Prepare for a routing tree operation by converting the given IPv6 address
283  * into a raw address that can be used in that routing tree operation.  If the
284  * given prefix length allows for it, also incorporate the address zone.
285  */
286 static inline void
route_prepare_v6(const ip6_addr_t * ip6addr,unsigned int prefix,uint8_t rtaddr[ROUTE_ADDR_MAX])287 route_prepare_v6(const ip6_addr_t * ip6addr, unsigned int prefix,
288 	uint8_t rtaddr[ROUTE_ADDR_MAX])
289 {
290 
291 	assert(sizeof(ip6addr->addr) == IP6_BITS / NBBY);
292 
293 	/*
294 	 * TODO: in most cases, we could actually return a pointer to the
295 	 * address contained in the given lwIP IP address structure.  However,
296 	 * doing so would make a lot things quite a bit messier around here,
297 	 * but the small performance gain may still make it worth it.
298 	 */
299 	memcpy(rtaddr, ip6addr->addr, sizeof(ip6addr->addr));
300 
301 	/*
302 	 * Embed the zone ID into the address, KAME style.  This is the
303 	 * easiest way to have link-local addresses for multiple interfaces
304 	 * coexist in a single routing tree.  Do this only if the full zone ID
305 	 * would be included in the prefix though, or we might de-normalize the
306 	 * address.
307 	 */
308 	if (ip6_addr_has_zone(ip6addr) && prefix >= 32)
309 		rtaddr[3] = ip6_addr_zone(ip6addr);
310 }
311 
312 /*
313  * Prepare for a routing tree operation by converting the given IP address into
314  * a raw address that can be used in that routing tree operation.  The given
315  * address's zone ID is embedded "KAME-style" into the raw (IPv6) address when
316  * applicable and if the given prefix length allows for it.  Return the index
317  * of the routing tree to use (ROUTE_TREE_V4 or ROUTE_TREE_V6).
318  */
319 static unsigned int
route_prepare(const ip_addr_t * ipaddr,unsigned int prefix,uint8_t rtaddr[ROUTE_ADDR_MAX])320 route_prepare(const ip_addr_t * ipaddr, unsigned int prefix,
321 	uint8_t rtaddr[ROUTE_ADDR_MAX])
322 {
323 
324 	switch (IP_GET_TYPE(ipaddr)) {
325 	case IPADDR_TYPE_V4:
326 		route_prepare_v4(ip_2_ip4(ipaddr), rtaddr);
327 
328 		return ROUTE_TREE_V4;
329 
330 	case IPADDR_TYPE_V6:
331 		route_prepare_v6(ip_2_ip6(ipaddr), prefix, rtaddr);
332 
333 		return ROUTE_TREE_V6;
334 
335 	default:
336 		panic("unknown IP address type: %u", IP_GET_TYPE(ipaddr));
337 	}
338 }
339 
340 /*
341  * The given routing tree (ROUTE_TREE_V4 or ROUTE_TREE_V6) has been updated.
342  * Invalidate any cache entries that may now have become stale, both locally
343  * and in lwIP.
344  */
345 static void
route_updated(unsigned int tree)346 route_updated(unsigned int tree)
347 {
348 
349 	if (tree == ROUTE_TREE_V6) {
350 		rtcache_reset_v6();
351 
352 		/*
353 		 * Also clear the lwIP ND6 destination cache, which may now
354 		 * contain entries for the wrong gateway.
355 		 */
356 		nd6_clear_destination_cache();
357 	} else
358 		rtcache_reset_v4();
359 }
360 
361 /*
362  * Add a route to the appropriate routing table.  The address, address zone,
363  * prefix, and RTF_HOST flag in the flags field make up the identity of the
364  * route.  If the flags field contains RTF_GATEWAY, a gateway must be given;
365  * otherwise, it must be NULL.  The route is associated with the given
366  * interface, which may not be NULL.  The caller must ensure that the flags
367  * field does not contain unsupported flags.  On success, return OK, and also
368  * also announce the addition.  On failure, return a negative error code.
369  */
370 int
route_add(const ip_addr_t * addr,unsigned int prefix,const ip_addr_t * gateway,struct ifdev * ifdev,unsigned int flags,const struct rtsock_request * rtr)371 route_add(const ip_addr_t * addr, unsigned int prefix,
372 	const ip_addr_t * gateway, struct ifdev * ifdev, unsigned int flags,
373 	const struct rtsock_request * rtr)
374 {
375 	struct route_entry *route;
376 	unsigned int tree, byte;
377 	int r;
378 
379 	assert(flags & RTF_UP);
380 	assert(!!(flags & RTF_GATEWAY) == (gateway != NULL));
381 	assert(ifdev != NULL);
382 
383 	/* Get a routing entry, if any are available. */
384 	if (SIMPLEQ_EMPTY(&route_freelist))
385 		return ENOBUFS;
386 
387 	route = SIMPLEQ_FIRST(&route_freelist);
388 
389 	/*
390 	 * Perform sanity checks on the input, and fill in enough of the
391 	 * routing entry to be able to try and add it to the routing tree.
392 	 */
393 	memset(route->re_addr, 0, sizeof(route->re_addr));
394 
395 	tree = route_prepare(addr, prefix, route->re_addr);
396 
397 	switch (tree) {
398 	case ROUTE_TREE_V4:
399 		if (prefix > IP4_BITS ||
400 		    (prefix != IP4_BITS && (flags & RTF_HOST)))
401 			return EINVAL;
402 
403 		flags &= ~RTF_IPV6;
404 
405 		break;
406 
407 	case ROUTE_TREE_V6:
408 		if (prefix > IP6_BITS ||
409 		    (prefix != IP6_BITS && (flags & RTF_HOST)))
410 			return EINVAL;
411 
412 		flags |= RTF_IPV6;
413 
414 		break;
415 
416 	default:
417 		return EINVAL;
418 	}
419 
420 	/* Generate the (raw) network mask.  This is protocol agnostic! */
421 	addr_make_netmask(route->re_mask, sizeof(route->re_mask), prefix);
422 
423 	/* The given address must be normalized to its mask. */
424 	for (byte = 0; byte < __arraycount(route->re_addr); byte++)
425 		if ((route->re_addr[byte] & ~route->re_mask[byte]) != 0)
426 			return EINVAL;
427 
428 	/*
429 	 * Attempt to add the routing entry.  Host-type entries do not have an
430 	 * associated mask, enabling ever-so-slightly faster matching.
431 	 */
432 	if ((r = rttree_add(&route_tree[tree], &route->re_entry,
433 	    route->re_addr, (flags & RTF_HOST) ? NULL : route->re_mask,
434 	    prefix)) != OK)
435 		return r;
436 
437 	/*
438 	 * Success.  Finish the routing entry.  Remove the entry from the free
439 	 * list before assigning re_ifdev, as these two use the same memory.
440 	 */
441 	SIMPLEQ_REMOVE_HEAD(&route_freelist, re_next);
442 
443 	route->re_ifdev = ifdev;
444 	route->re_flags = flags;
445 
446 	/*
447 	 * Store the gateway if one is given.  Store the address in lwIP format
448 	 * because that is the easiest way use it later again.  Store it as a
449 	 * union to keep the route entry structure as small as possible.  Store
450 	 * the address without its zone, because the gateway's address zone is
451 	 * implied by its associated ifdev.
452 	 *
453 	 * If no gateway is given, this is a link-type route, i.e., a route for
454 	 * a local network, with all nodes directly connected and reachable.
455 	 */
456 	if (flags & RTF_GATEWAY) {
457 		if (flags & RTF_IPV6)
458 			ip6_addr_copy_to_packed(route->re_gw6,
459 			    *ip_2_ip6(gateway));
460 		else
461 			ip4_addr_copy(route->re_gw4, *ip_2_ip4(gateway));
462 	}
463 
464 	/* We have made routing changes. */
465 	route_updated(tree);
466 
467 	/* Announce the route addition. */
468 	rtsock_msg_route(route, RTM_ADD, rtr);
469 
470 	return OK;
471 }
472 
473 /*
474  * Check whether it is possible to add a route for the given destination to the
475  * corresponding routing table, that is, a subsequent route_add() call for this
476  * destination address is guaranteed to succeed (if all its parameters are
477  * valid).  Return TRUE if adding the route is guaranteed to succeed, or FALSE
478  * if creating a route for the given destination would fail.
479  */
480 int
route_can_add(const ip_addr_t * addr,unsigned int prefix,int is_host __unused)481 route_can_add(const ip_addr_t * addr, unsigned int prefix,
482 	int is_host __unused)
483 {
484 	uint8_t rtaddr[ROUTE_ADDR_MAX];
485 	unsigned int tree;
486 
487 	tree = route_prepare(addr, prefix, rtaddr);
488 
489 	/*
490 	 * The corresponding routing tree must not already contain an exact
491 	 * match for the destination.  If the routing tree implementation is
492 	 * ever extended with support for coexisting host and net entries with
493 	 * the same prefix, we should also pass in 'is_host' here.
494 	 */
495 	if (rttree_lookup_exact(&route_tree[tree], rtaddr, prefix) != NULL)
496 		return FALSE;
497 
498 	/* There must be a routing entry on the free list as well. */
499 	return !SIMPLEQ_EMPTY(&route_freelist);
500 }
501 
502 /*
503  * Find a route with the exact given route identity.  Return the route if
504  * found, or NULL if no route exists with this identity.
505  */
506 struct route_entry *
route_find(const ip_addr_t * addr,unsigned int prefix,int is_host)507 route_find(const ip_addr_t * addr, unsigned int prefix, int is_host)
508 {
509 	struct rttree_entry *entry;
510 	struct route_entry *route;
511 	uint8_t rtaddr[ROUTE_ADDR_MAX];
512 	unsigned int tree;
513 
514 	tree = route_prepare(addr, prefix, rtaddr);
515 
516 	entry = rttree_lookup_exact(&route_tree[tree], rtaddr, prefix);
517 	if (entry == NULL)
518 		return NULL;
519 
520 	route = (struct route_entry *)entry;
521 
522 	/*
523 	 * As long as the routing tree code does not support coexisting host
524 	 * and net entries with the same prefix, we have to check the type.
525 	 */
526 	if (!!(route->re_flags & RTF_HOST) != is_host)
527 		return NULL;
528 
529 	return route;
530 }
531 
532 /*
533  * A route lookup failed for the given IP address.  Generate an RTM_MISS
534  * message on routing sockets.
535  */
536 static void
route_miss(const ip_addr_t * ipaddr)537 route_miss(const ip_addr_t * ipaddr)
538 {
539 	union sockaddr_any addr;
540 	socklen_t addr_len;
541 
542 	addr_len = sizeof(addr);
543 
544 	addr_put_inet(&addr.sa, &addr_len, ipaddr, TRUE /*kame*/, 0 /*port*/);
545 
546 	rtsock_msg_miss(&addr.sa);
547 }
548 
549 /*
550  * A route lookup failed for the given IPv4 address.  Generate an RTM_MISS
551  * message on routing sockets.
552  */
553 static void
route_miss_v4(const ip4_addr_t * ip4addr)554 route_miss_v4(const ip4_addr_t * ip4addr)
555 {
556 	ip_addr_t ipaddr;
557 
558 	ip_addr_copy_from_ip4(ipaddr, *ip4addr);
559 
560 	route_miss(&ipaddr);
561 }
562 
563 /*
564  * A route lookup failed for the given IPv6 address.  Generate an RTM_MISS
565  * message on routing sockets.
566  */
567 static void
route_miss_v6(const ip6_addr_t * ip6addr)568 route_miss_v6(const ip6_addr_t * ip6addr)
569 {
570 	ip_addr_t ipaddr;
571 
572 	ip_addr_copy_from_ip6(ipaddr, *ip6addr);
573 
574 	route_miss(&ipaddr);
575 }
576 
577 /*
578  * Look up the most narrow matching routing entry for the given IPv4 address.
579  * Return the routing entry if one exists at all, or NULL otherwise.  This
580  * function performs caching.
581  */
582 static inline struct route_entry *
route_lookup_v4(const ip4_addr_t * ip4addr)583 route_lookup_v4(const ip4_addr_t * ip4addr)
584 {
585 	uint8_t rtaddr[ROUTE_ADDR_MAX];
586 	struct route_entry *route;
587 
588 	/*
589 	 * Look up the route for the destination IP address, unless we have a
590 	 * cached route entry.  We cache negatives in order to avoid generating
591 	 * lots of RTM_MISS messages for the same destination in a row.
592 	 */
593 	if (rtcache_lookup_v4(ip4addr, &route))
594 		return route;
595 
596 	route_prepare_v4(ip4addr, rtaddr);
597 
598 	route = (struct route_entry *)
599 	    rttree_lookup_match(&route_tree[ROUTE_TREE_V4], rtaddr);
600 
601 	/* Cache the result, even if we found no route. */
602 	rtcache_add_v4(ip4addr, route);
603 
604 	return route;
605 }
606 
607 /*
608  * Look up the most narrow matching routing entry for the given IPv6 address,
609  * taking into account its zone ID if applicable.  Return the routing entry if
610  * one exists at all, or NULL otherwise.  This function performs caching.
611  */
612 static inline struct route_entry *
route_lookup_v6(const ip6_addr_t * ip6addr)613 route_lookup_v6(const ip6_addr_t * ip6addr)
614 {
615 	uint8_t rtaddr[ROUTE_ADDR_MAX];
616 	struct route_entry *route;
617 	int use_cache;
618 
619 	/*
620 	 * We do not support caching of addresses that should have a zone but
621 	 * do not: in different contexts, such addresses could yield different
622 	 * routes.
623 	 */
624 	use_cache = !ip6_addr_lacks_zone(ip6addr, IP6_UNKNOWN);
625 
626 	if (use_cache && rtcache_lookup_v6(ip6addr, &route))
627 		return route;
628 
629 	route_prepare_v6(ip6addr, IP6_BITS, rtaddr);
630 
631 	route = (struct route_entry *)
632 	    rttree_lookup_match(&route_tree[ROUTE_TREE_V6], rtaddr);
633 
634 	/* Cache the result, even if no route was found. */
635 	if (use_cache)
636 		rtcache_add_v6(ip6addr, route);
637 
638 	return route;
639 }
640 
641 /*
642  * Look up the most narrow matching routing entry for the given IP address,
643  * taking into account its zone ID if applicable.  Return the routing entry if
644  * one exists at all, or NULL otherwise.  This function performs caching.
645  */
646 struct route_entry *
route_lookup(const ip_addr_t * addr)647 route_lookup(const ip_addr_t * addr)
648 {
649 
650 	if (IP_IS_V4(addr))
651 		return route_lookup_v4(ip_2_ip4(addr));
652 	else
653 		return route_lookup_v6(ip_2_ip6(addr));
654 }
655 
656 /*
657  * Change an existing routing entry.  Its flags are always updated to the new
658  * set of given flags, although certain flags are always preserved.  If the
659  * new flags set has RTF_GATEWAY set and 'gateway' is not NULL, update the
660  * gateway associated with the route.  If 'ifdev' is not NULL, reassociate the
661  * route with the given interface; this will not affect the zone of the
662  * route's destination address.  On success, return OK, and also announce the
663  * change.  On failure, return a negative error code.
664  */
665 static int
route_change(struct route_entry * route,const ip_addr_t * gateway,struct ifdev * ifdev,unsigned int flags,const struct rtsock_request * rtr)666 route_change(struct route_entry * route, const ip_addr_t * gateway,
667 	struct ifdev * ifdev, unsigned int flags,
668 	const struct rtsock_request * rtr)
669 {
670 	unsigned int tree, preserve;
671 
672 	tree = (route->re_flags & RTF_IPV6) ? ROUTE_TREE_V6 : ROUTE_TREE_V4;
673 
674 	/* Update the associated interface (only) if a new one is given. */
675 	if (ifdev != NULL)
676 		route->re_ifdev = ifdev;
677 
678 	/*
679 	 * These flags may not be changed.  RTF_UP should always be set anyway.
680 	 * RTF_HOST and RTF_IPV6 are part of the route's identity.  RTF_LOCAL
681 	 * should be preserved as well, although we will not get here if either
682 	 * the old or the new flags have it set anyway.
683 	 */
684 	preserve = RTF_UP | RTF_HOST | RTF_IPV6 | RTF_LOCAL;
685 
686 	/* Always update the flags.  There is no way not to. */
687 	route->re_flags = (route->re_flags & preserve) | (flags & ~preserve);
688 
689 	/*
690 	 * If a new gateway is given *and* RTF_GATEWAY is set, update the
691 	 * gateway.  If RTF_GATEWAY is not set, this is a link-type route with
692 	 * no gateway.  If no new gateway is given, we keep the gateway as is.
693 	 */
694 	if (gateway != NULL && (flags & RTF_GATEWAY)) {
695 		if (flags & RTF_IPV6)
696 			ip6_addr_copy_to_packed(route->re_gw6,
697 			    *ip_2_ip6(gateway));
698 		else
699 			ip4_addr_copy(route->re_gw4, *ip_2_ip4(gateway));
700 	}
701 
702 	/* We have made routing changes. */
703 	route_updated(tree);
704 
705 	/* Announce the route change. */
706 	rtsock_msg_route(route, RTM_CHANGE, rtr);
707 
708 	return OK;
709 }
710 
711 /*
712  * Delete the given route, and announce its deletion.
713  */
714 void
route_delete(struct route_entry * route,const struct rtsock_request * rtr)715 route_delete(struct route_entry * route, const struct rtsock_request * rtr)
716 {
717 	unsigned int tree;
718 
719 	/* First announce the deletion, while the route is still around. */
720 	tree = (route->re_flags & RTF_IPV6) ? ROUTE_TREE_V6 : ROUTE_TREE_V4;
721 
722 	rtsock_msg_route(route, RTM_DELETE, rtr);
723 
724 	/* Then actually delete the route. */
725 	rttree_delete(&route_tree[tree], &route->re_entry);
726 
727 	SIMPLEQ_INSERT_HEAD(&route_freelist, route, re_next);
728 
729 	/* We have made routing changes. */
730 	route_updated(tree);
731 }
732 
733 /*
734  * Delete all routes associated with the given interface, typically as part of
735  * destroying the interface.
736  */
737 void
route_clear(struct ifdev * ifdev)738 route_clear(struct ifdev * ifdev)
739 {
740 	struct rttree_entry *entry, *parent;
741 	struct route_entry *route;
742 	unsigned int tree;
743 
744 	/*
745 	 * Delete all routes associated with the given interface.  Fortunately,
746 	 * we need not also delete addresses zoned to the given interface,
747 	 * because no route can be created with a zone ID that does not match
748 	 * the associated interface.  That is the main reason why we ignore
749 	 * zone IDs for gateways when adding or changing routes..
750 	 */
751 	for (tree = 0; tree < NR_ROUTE_TREE; tree++) {
752 		parent = NULL;
753 
754 		while ((entry = rttree_enum(&route_tree[tree],
755 		    parent)) != NULL) {
756 			route = (struct route_entry *)entry;
757 
758 			if (route->re_ifdev == ifdev)
759 				route_delete(route, NULL /*request*/);
760 			else
761 				parent = entry;
762 		}
763 	}
764 }
765 
766 /*
767  * Process a routing command specifically for an IPv4 or IPv6 route, as one of
768  * the specific continuations of processing started by route_process().  The
769  * RTM_ routing command is given as 'type'.  The route destination is given as
770  * 'dst_addr'; its address type determines whether the operation is for IPv4 or
771  * IPv6.  The sockaddr structures for 'mask' and 'gateway' are passed on as is
772  * and may have to be parsed here if not NULL.  'ifdev' is the interface to be
773  * associated with a route; it is non-NULL only if an interface name (IFP) or
774  * address (IFA) was given.  The RTF_ flags field 'flags' has been checked
775  * against the globally supported flags, but may have to be checked for flags
776  * that do not apply to IPv4/IPv6 routes.  Return OK or a negative error code,
777  * following the same semantics as route_process().
778  */
779 static int
route_process_inet(unsigned int type,const ip_addr_t * dst_addr,const struct sockaddr * mask,const struct sockaddr * gateway,struct ifdev * ifdev,unsigned int flags,const struct rtsock_request * rtr)780 route_process_inet(unsigned int type, const ip_addr_t * dst_addr,
781 	const struct sockaddr * mask, const struct sockaddr * gateway,
782 	struct ifdev * ifdev, unsigned int flags,
783 	const struct rtsock_request * rtr)
784 {
785 	struct route_entry *route;
786 	ip_addr_t gw_storage, *gw_addr;
787 	struct ifdev *ifdev2;
788 	uint32_t zone;
789 	unsigned int prefix;
790 	int r;
791 
792 	assert(!(flags & RTF_LLDATA));
793 
794 	if ((flags & (RTF_DYNAMIC | RTF_MODIFIED | RTF_DONE | RTF_XRESOLVE |
795 	    RTF_LLINFO | RTF_CLONED | RTF_SRC | RTF_ANNOUNCE |
796 	    RTF_BROADCAST)) != 0)
797 		return EINVAL;
798 
799 	/*
800 	 * For network entries, a network mask must be provided in all cases.
801 	 * For host entries, the network mask is ignored, and we use a prefix
802 	 * with all bits set.
803 	 */
804 	if (!(flags & RTF_HOST)) {
805 		if (mask == NULL)
806 			return EINVAL;
807 
808 		if ((r = addr_get_netmask(mask, mask->sa_len,
809 		    IP_GET_TYPE(dst_addr), &prefix, NULL /*ipaddr*/)) != OK)
810 			return r;
811 	} else {
812 		if (IP_IS_V4(dst_addr))
813 			prefix = IP4_BITS;
814 		else
815 			prefix = IP6_BITS;
816 	}
817 
818 	gw_addr = NULL;
819 
820 	/*
821 	 * Determine the gateway and interface for the routing entry, if
822 	 * applicable.
823 	 */
824 	if (type == RTM_ADD || type == RTM_CHANGE) {
825 		/*
826 		 * The RTF_UP flag must always be set, but only if the flags
827 		 * field is used at all.
828 		 */
829 		if (!(flags & RTF_UP))
830 			return EINVAL;
831 
832 		if ((flags & RTF_GATEWAY) && gateway != NULL) {
833 			if ((r = addr_get_inet(gateway, gateway->sa_len,
834 			    IP_GET_TYPE(dst_addr), &gw_storage, TRUE /*kame*/,
835 			    NULL /*port*/)) != OK)
836 				return r;
837 
838 			gw_addr = &gw_storage;
839 
840 			/*
841 			 * We use the zone of the gateway to help determine the
842 			 * interface, but we do not reject a mismatching zone
843 			 * here.  The reason for this is that we do not want
844 			 * routes that have zones for an interface other than
845 			 * the one associated with the route, as that could
846 			 * create a world of trouble: packets leaving their
847 			 * zone, complications with cleaning up interfaces..
848 			 */
849 			if (IP_IS_V6(gw_addr) &&
850 			    ip6_addr_has_zone(ip_2_ip6(gw_addr))) {
851 				zone = ip6_addr_zone(ip_2_ip6(gw_addr));
852 
853 				ifdev2 = ifdev_get_by_index(zone);
854 
855 				if (ifdev != NULL && ifdev != ifdev2)
856 					return EINVAL;
857 				else
858 					ifdev = ifdev2;
859 			}
860 
861 			/*
862 			 * If we still have no interface at this point, see if
863 			 * we can find one based on just the gateway address.
864 			 * See if a locally attached network owns the address.
865 			 * That may not succeed, leaving ifdev set to NULL.
866 			 */
867 			if (ifdev == NULL)
868 				ifdev = ifaddr_map_by_subnet(gw_addr);
869 		}
870 
871 		/*
872 		 * When adding routes, all necessary information must be given.
873 		 * When changing routes, we can leave some settings as is.
874 		 */
875 		if (type == RTM_ADD) {
876 			if ((flags & RTF_GATEWAY) && gw_addr == NULL)
877 				return EINVAL;
878 
879 			/* TODO: try harder to find a matching interface.. */
880 			if (ifdev == NULL)
881 				return ENETUNREACH;
882 		}
883 	}
884 
885 	/*
886 	 * All route commands except RTM_ADD require that a route exists for
887 	 * the given identity, although RTM_GET, when requesting a host entry,
888 	 * may return a wider (network) route based on just the destination
889 	 * address.
890 	 */
891 	if (type != RTM_ADD) {
892 		/* For RTM_GET (only), a host query may return a net route. */
893 		if (type == RTM_GET && (flags & RTF_HOST))
894 			route = route_lookup(dst_addr);
895 		else
896 			route = route_find(dst_addr, prefix,
897 			    !!(flags & RTF_HOST));
898 
899 		if (route == NULL)
900 			return ESRCH;
901 	} else
902 		route = NULL;
903 
904 	/* Process the actual routing command. */
905 	switch (type) {
906 	case RTM_ADD:
907 		return route_add(dst_addr, prefix, gw_addr, ifdev, flags, rtr);
908 
909 	case RTM_CHANGE:
910 		/* Routes for local addresses are immutable. */
911 		if (route_is_immutable(route))
912 			return EPERM;
913 
914 		return route_change(route, gw_addr, ifdev, flags, rtr);
915 
916 	case RTM_DELETE:
917 		/* Routes for local addresses are immutable. */
918 		if (route_is_immutable(route))
919 			return EPERM;
920 
921 		route_delete(route, rtr);
922 
923 		return OK;
924 
925 	case RTM_LOCK:
926 		/*
927 		 * TODO: implement even the suggestion that we support this.
928 		 * For now, we do not keep per-route metrics, let alone change
929 		 * them dynamically ourselves, so "locking" metrics is really
930 		 * not a concept that applies to us.  We may however have to
931 		 * save the lock mask and return it in queries..
932 		 */
933 		/* FALLTHROUGH */
934 	case RTM_GET:
935 		/* Simply generate a message for the route we just found. */
936 		rtsock_msg_route(route, type, rtr);
937 
938 		return OK;
939 
940 	default:
941 		return EINVAL;
942 	}
943 }
944 
945 /*
946  * Process a routing command from a routing socket.  The RTM_ type of command
947  * is given as 'type', and is one of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_GET,
948  * RTM_LOCK.  In addition, the function takes a set of sockaddr pointers as
949  * provided by the routing command.  Each of these sockaddr pointers may be
950  * NULL; if not NULL, the structure is at least large enough to contain the
951  * address length (sa_len) and family (sa_family), and the length never exceeds
952  * the amount of memory used to store the sockaddr structure.  However, the
953  * length itself has not yet been checked against the expected protocol
954  * structure and could even be zero.  The command's RTF_ routing flags and
955  * metrics are provided as well.  On success, return OK, in which case the
956  * caller assumes that a routing socket announcement for the processed command
957  * has been sent already (passing on 'rtr' to the announcement function as is).
958  * On failure, return a negative error code; in that case, the caller will send
959  * a failure response on the original routing socket itself.
960  */
961 int
route_process(unsigned int type,const struct sockaddr * dst,const struct sockaddr * mask,const struct sockaddr * gateway,const struct sockaddr * ifp,const struct sockaddr * ifa,unsigned int flags,unsigned long inits,const struct rt_metrics * rmx,const struct rtsock_request * rtr)962 route_process(unsigned int type, const struct sockaddr * dst,
963 	const struct sockaddr * mask, const struct sockaddr * gateway,
964 	const struct sockaddr * ifp, const struct sockaddr * ifa,
965 	unsigned int flags, unsigned long inits,
966 	const struct rt_metrics * rmx, const struct rtsock_request * rtr)
967 {
968 	struct ifdev *ifdev, *ifdev2;
969 	char name[IFNAMSIZ];
970 	ip_addr_t dst_addr, if_addr;
971 	uint32_t zone;
972 	uint8_t addr_type;
973 	int r;
974 
975 	/*
976 	 * The identity of a route is determined by its destination address,
977 	 * destination zone, prefix length, and whether it is a host entry
978 	 * or not.  If it is a host entry (RTF_HOST is set), the prefix length
979 	 * is implied by the protocol; otherwise it should be obtained from the
980 	 * given netmask if necessary.  For link-local addresses, the zone ID
981 	 * must be embedded KAME-style in the destination address.  A
982 	 * destination address must always be given.  The destination address
983 	 * also determines the overall address family.
984 	 */
985 	if (dst == NULL)
986 		return EINVAL;
987 
988 	switch (dst->sa_family) {
989 	case AF_INET:
990 		addr_type = IPADDR_TYPE_V4;
991 		break;
992 #ifdef INET6
993 	case AF_INET6:
994 		addr_type = IPADDR_TYPE_V6;
995 		break;
996 #endif /* INET6 */
997 	default:
998 		return EAFNOSUPPORT;
999 	}
1000 
1001 	if ((r = addr_get_inet(dst, dst->sa_len, addr_type, &dst_addr,
1002 	    TRUE /*kame*/, NULL /*port*/)) != OK)
1003 		return r;
1004 
1005 	/*
1006 	 * Perform a generic test on the given flags.  This covers everything
1007 	 * we support at all, plus a few flags we ignore.  Specific route types
1008 	 * may have further restrictions; those tests are performed later.
1009 	 */
1010 	if ((flags & ~(RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_REJECT |
1011 	    RTF_CLONING | RTF_LLINFO | RTF_LLDATA | RTF_STATIC |
1012 	    RTF_BLACKHOLE | RTF_CLONED | RTF_PROTO2 | RTF_PROTO1)) != 0)
1013 		return EINVAL;
1014 
1015 	ifdev = NULL;
1016 
1017 	if (type == RTM_ADD || type == RTM_CHANGE) {
1018 		/*
1019 		 * If an interface address or name is given, use that to
1020 		 * identify the target interface.  If both are given, make sure
1021 		 * that both identify the same interface--a hopefully helpful
1022 		 * feature to detect wrong route(8) usage (NetBSD simply takes
1023 		 * IFP over IFA).  An empty interface name is ignored on the
1024 		 * basis that libc link_addr(3) is broken.
1025 		 */
1026 		if (ifp != NULL) {
1027 			if ((r = addr_get_link(ifp, ifp->sa_len, name,
1028 			    sizeof(name), NULL /*hwaddr*/,
1029 			    0 /*hwaddr_len*/)) != OK)
1030 				return r;
1031 
1032 			if (name[0] != '\0' &&
1033 			    (ifdev = ifdev_find_by_name(name)) == NULL)
1034 				return ENXIO;
1035 		}
1036 
1037 		if (ifa != NULL) {
1038 			/*
1039 			 * This is similar to retrieval of source addresses in
1040 			 * ipsock, with the difference that we do not impose
1041 			 * that a zone ID be given for link-local addresses.
1042 			 */
1043 			if ((r = addr_get_inet(ifa, ifa->sa_len, addr_type,
1044 			    &if_addr, TRUE /*kame*/, NULL /*port*/)) != OK)
1045 				return r;
1046 
1047 			if ((ifdev2 = ifaddr_map_by_addr(&if_addr)) == NULL)
1048 				return EADDRNOTAVAIL;
1049 
1050 			if (ifdev != NULL && ifdev != ifdev2)
1051 				return EINVAL;
1052 			else
1053 				ifdev = ifdev2;
1054 		}
1055 
1056 		/*
1057 		 * If the destination address has a zone, then it must not
1058 		 * conflict with the interface, if one was given.  If not, we
1059 		 * may use it to decide the interface to use for the route.
1060 		 */
1061 		if (IP_IS_V6(&dst_addr) &&
1062 		    ip6_addr_has_zone(ip_2_ip6(&dst_addr))) {
1063 			if (ifdev == NULL) {
1064 				zone = ip6_addr_zone(ip_2_ip6(&dst_addr));
1065 
1066 				ifdev = ifdev_get_by_index(zone);
1067 			} else {
1068 				if (!ip6_addr_test_zone(ip_2_ip6(&dst_addr),
1069 				    ifdev_get_netif(ifdev)))
1070 					return EADDRNOTAVAIL;
1071 			}
1072 		}
1073 	}
1074 
1075 	/*
1076 	 * For now, no initializers are supported by any of the sub-processing
1077 	 * routines, so outright reject requests that set any initializers.
1078 	 * Most importantly, we do not support per-route MTU settings (RTV_MTU)
1079 	 * because lwIP would not use them, and we do not support non-zero
1080 	 * expiry (RTV_EXPIRE) because for IPv4/IPv6 routes it is not a widely
1081 	 * used feature and for ARP/NDP we would have to change lwIP.
1082 	 * dhcpcd(8) does supply RTV_MTU, we have to ignore that option rather
1083 	 * than reject it, unfortunately.  arp(8) always sets RTV_EXPIRE, so we
1084 	 * reject only non-zero expiry there.
1085 	 */
1086 	if ((inits & ~(RTV_EXPIRE | RTV_MTU)) != 0 ||
1087 	    ((inits & RTV_EXPIRE) != 0 && rmx->rmx_expire != 0))
1088 		return ENOSYS;
1089 
1090 	/*
1091 	 * From here on, the processing differs for ARP, NDP, and IP routes.
1092 	 * As of writing, our userland is from NetBSD 7, which puts link-local
1093 	 * route entries in its main route tables.  This means we would have to
1094 	 * search for existing routes before we can determine whether, say, a
1095 	 * RTM_GET request is for an IP or an ARP route entry.  As of NetBSD 8,
1096 	 * the link-local administration is separated, and all requests use the
1097 	 * RTF_LLDATA flag to indicate that they are for ARP/NDP routes rather
1098 	 * than IP routes.  Since that change makes things much cleaner for us,
1099 	 * we borrow from the future, patching arp(8) and ndp(8) to add the
1100 	 * RTF_LLDATA flag now, so that we can implement a clean split here.
1101 	 */
1102 	if (!(flags & RTF_LLDATA))
1103 		return route_process_inet(type, &dst_addr, mask, gateway,
1104 		    ifdev, flags, rtr);
1105 	else
1106 		return lldata_process(type, &dst_addr, gateway, ifdev, flags,
1107 		    rtr);
1108 }
1109 
1110 /*
1111  * Return the routing flags (RTF_) for the given routing entry.  Strip out any
1112  * internal flags.
1113  */
1114 unsigned int
route_get_flags(const struct route_entry * route)1115 route_get_flags(const struct route_entry * route)
1116 {
1117 
1118 	return route->re_flags & ~RTF_IPV6;
1119 }
1120 
1121 /*
1122  * Return TRUE if the given routing entry is for the IPv6 address family, or
1123  * FALSE if it is for IPv4.
1124  */
1125 int
route_is_ipv6(const struct route_entry * route)1126 route_is_ipv6(const struct route_entry * route)
1127 {
1128 
1129 	return !!(route->re_flags & RTF_IPV6);
1130 }
1131 
1132 /*
1133  * Return the interface associated with the given routing entry.  The resulting
1134  * interface is never NULL.
1135  */
1136 struct ifdev *
route_get_ifdev(const struct route_entry * route)1137 route_get_ifdev(const struct route_entry * route)
1138 {
1139 
1140 	return route->re_ifdev;
1141 }
1142 
1143 /*
1144  * Convert the given raw routing address pointed to by 'rtaddr' into a
1145  * lwIP-style IP address 'ipaddr' of type 'type', which must by IPADDR_TYPE_V4
1146  * or IPADDR_TYPE_V6.
1147  */
1148 static void
route_get_addr(ip_addr_t * ipaddr,const uint8_t * rtaddr,uint8_t type)1149 route_get_addr(ip_addr_t * ipaddr, const uint8_t * rtaddr, uint8_t type)
1150 {
1151 	ip6_addr_t *ip6addr;
1152 	uint32_t val, zone;
1153 
1154 	/*
1155 	 * Convert the routing address to a lwIP-type IP address.  Take out the
1156 	 * KAME-style embedded zone, if needed.
1157 	 */
1158 	memset(ipaddr, 0, sizeof(*ipaddr));
1159 	IP_SET_TYPE(ipaddr, type);
1160 
1161 	switch (type) {
1162 	case IPADDR_TYPE_V4:
1163 		memcpy(&val, rtaddr, sizeof(val));
1164 
1165 		ip_addr_set_ip4_u32(ipaddr, val);
1166 
1167 		break;
1168 
1169 	case IPADDR_TYPE_V6:
1170 		ip6addr = ip_2_ip6(ipaddr);
1171 
1172 		memcpy(ip6addr->addr, rtaddr, sizeof(ip6addr->addr));
1173 
1174 		if (ip6_addr_has_scope(ip6addr, IP6_UNKNOWN)) {
1175 			zone = ntohl(ip6addr->addr[0]) & 0x0000ffffU;
1176 
1177 			ip6addr->addr[0] &= PP_HTONL(0xffff0000U);
1178 
1179 			ip6_addr_set_zone(ip6addr, zone);
1180 		}
1181 
1182 		break;
1183 
1184 	default:
1185 		panic("unknown IP address type: %u", type);
1186 	}
1187 }
1188 
1189 /*
1190  * Obtain information about an IPv4 or IPv6 routing entry, by filling 'addr',
1191  * 'mask', 'gateway', and optionally (if not NULL) 'ifp' and 'ifa' with
1192  * sockaddr-type data for each of those fields.  Also store the associated
1193  * interface in 'ifdevp', the routing entry's flags in 'flags', and the route's
1194  * usage count in 'use'.
1195  */
1196 void
route_get(const struct route_entry * route,union sockaddr_any * addr,union sockaddr_any * mask,union sockaddr_any * gateway,union sockaddr_any * ifp,union sockaddr_any * ifa,struct ifdev ** ifdevp,unsigned int * flags,unsigned int * use)1197 route_get(const struct route_entry * route, union sockaddr_any * addr,
1198 	union sockaddr_any * mask, union sockaddr_any * gateway,
1199 	union sockaddr_any * ifp, union sockaddr_any * ifa,
1200 	struct ifdev ** ifdevp, unsigned int * flags, unsigned int * use)
1201 {
1202 	const ip_addr_t *src_addr;
1203 	ip_addr_t dst_addr, gw_addr;
1204 	struct ifdev *ifdev;
1205 	socklen_t addr_len;
1206 	uint8_t type;
1207 
1208 	type = (route->re_flags & RTF_IPV6) ? IPADDR_TYPE_V6 : IPADDR_TYPE_V4;
1209 
1210 	/* Get the destination address. */
1211 	route_get_addr(&dst_addr, route->re_addr, type);
1212 
1213 	addr_len = sizeof(*addr);
1214 
1215 	addr_put_inet(&addr->sa, &addr_len, &dst_addr, TRUE /*kame*/,
1216 	    0 /*port*/);
1217 
1218 	/* Get the network mask, if applicable. */
1219 	if (!(route->re_flags & RTF_HOST)) {
1220 		addr_len = sizeof(*mask);
1221 
1222 		addr_put_netmask(&mask->sa, &addr_len, type,
1223 		    rttree_get_prefix(&route->re_entry));
1224 	} else
1225 		mask->sa.sa_len = 0;
1226 
1227 	/* Get the gateway, which may be an IP address or a local link. */
1228 	addr_len = sizeof(*gateway);
1229 
1230 	ifdev = route->re_ifdev;
1231 
1232 	if (route->re_flags & RTF_GATEWAY) {
1233 		if (type == IPADDR_TYPE_V4)
1234 			ip_addr_copy_from_ip4(gw_addr, route->re_gw4);
1235 		else
1236 			ip_addr_copy_from_ip6_packed(gw_addr, route->re_gw6);
1237 
1238 		addr_put_inet(&gateway->sa, &addr_len, &gw_addr, TRUE /*kame*/,
1239 		    0 /*port*/);
1240 	} else {
1241 		addr_put_link(&gateway->sa, &addr_len, ifdev_get_index(ifdev),
1242 		    ifdev_get_iftype(ifdev), NULL /*name*/, NULL /*hwaddr*/,
1243 		    0 /*hwaddr_len*/);
1244 	}
1245 
1246 	/* Get the associated interface name. */
1247 	if (ifp != NULL) {
1248 		addr_len = sizeof(*ifp);
1249 
1250 		addr_put_link(&ifp->sa, &addr_len, ifdev_get_index(ifdev),
1251 		    ifdev_get_iftype(ifdev), ifdev_get_name(ifdev),
1252 		    NULL /*hwaddr*/, 0 /*hwaddr_len*/);
1253 	}
1254 
1255 	/* Get the associated source address, if we can determine one. */
1256 	if (ifa != NULL) {
1257 		src_addr = ifaddr_select(&dst_addr, ifdev, NULL /*ifdevp*/);
1258 
1259 		if (src_addr != NULL) {
1260 			addr_len = sizeof(*ifa);
1261 
1262 			addr_put_inet(&ifa->sa, &addr_len, src_addr,
1263 			    TRUE /*kame*/, 0 /*port*/);
1264 		} else
1265 			ifa->sa.sa_len = 0;
1266 	}
1267 
1268 	/* Get other fields. */
1269 	*flags = route_get_flags(route);	/* strip any internal flags */
1270 	*ifdevp = ifdev;
1271 	*use = route->re_use;
1272 }
1273 
1274 /*
1275  * Enumerate IPv4 routing entries.  Return the first IPv4 routing entry if
1276  * 'last' is NULL, or the next routing entry after 'last' if it is not NULL.
1277  * In both cases, the return value may be NULL if there are no more routes.
1278  */
1279 struct route_entry *
route_enum_v4(struct route_entry * last)1280 route_enum_v4(struct route_entry * last)
1281 {
1282 
1283 	assert(last == NULL || !(last->re_flags & RTF_IPV6));
1284 
1285 	return (struct route_entry *)rttree_enum(&route_tree[ROUTE_TREE_V4],
1286 	    (last != NULL) ? &last->re_entry : NULL);
1287 }
1288 
1289 /*
1290  * Enumerate IPv6 routing entries.  Return the first IPv6 routing entry if
1291  * 'last' is NULL, or the next routing entry after 'last' if it is not NULL.
1292  * In both cases, the return value may be NULL if there are no more routes.
1293  */
1294 struct route_entry *
route_enum_v6(struct route_entry * last)1295 route_enum_v6(struct route_entry * last)
1296 {
1297 
1298 	assert(last == NULL || (last->re_flags & RTF_IPV6));
1299 
1300 	return (struct route_entry *)rttree_enum(&route_tree[ROUTE_TREE_V6],
1301 	    (last != NULL) ? &last->re_entry : NULL);
1302 }
1303 
1304 /*
1305  * lwIP IPv4 routing function.   Given an IPv4 destination address, look up and
1306  * return the target interface, or NULL if there is no route to the address.
1307  *
1308  * This is a full replacement of the corresponding lwIP function, which should
1309  * be overridden with weak symbols, using patches against the lwIP source code.
1310  * As such, the lwIP headers should already provide the correct prototype for
1311  * this function.  If not, something will have changed in the lwIP
1312  * implementation, and this code must be revised accordingly.
1313  */
1314 struct netif *
ip4_route(const ip4_addr_t * dst)1315 ip4_route(const ip4_addr_t * dst)
1316 {
1317 	struct route_entry *route;
1318 	struct ifdev *ifdev;
1319 
1320 	/*
1321 	 * Look up the route for the destination IPv4 address.  If no route is
1322 	 * found at all, return NULL to the caller.
1323 	 */
1324 	if ((route = route_lookup_v4(dst)) == NULL) {
1325 		route_miss_v4(dst);
1326 
1327 		return NULL;
1328 	}
1329 
1330 	/*
1331 	 * For now, we increase the use counter only for actual route lookups,
1332 	 * and not for gateway lookups or user queries.  As of writing,
1333 	 * route(8) does not print this number anyway..
1334 	 */
1335 	route->re_use++;
1336 
1337 	/*
1338 	 * For all packets that are supposed to be rejected or blackholed, use
1339 	 * a loopback interface, regardless of the interface to which the route
1340 	 * is associated (even though it will typically be lo0 anyway).  The
1341 	 * reason for this is that on packet output, we perform another route
1342 	 * route lookup just to check for rejection/blackholing, but for
1343 	 * efficiency reasons, we limit such checks to loopback interfaces:
1344 	 * loopback traffic will typically use only one IP address anyway, thus
1345 	 * limiting route misses from such rejection/blackhole route lookups as
1346 	 * much as we can.  The lookup is implemented in route_output_v4().  We
1347 	 * divert only if the target interface is not a loopback interface
1348 	 * already, mainly to allow userland tests to create blackhole routes
1349 	 * to a specific loopback interface for testing purposes.
1350 	 *
1351 	 * It is not correct to return NULL for RTF_REJECT routes here, because
1352 	 * this could cause e.g. connect() calls to fail immediately, which is
1353 	 * not how rejection should work.  Related: a previous incarnation of
1354 	 * support for these flags used a dedicated netif to eliminate the
1355 	 * extra route lookup on regular output altogether, but in the current
1356 	 * situation, that netif would have to be assigned (IPv4 and IPv6)
1357 	 * addresses in order not to break e.g. connect() in the same way.
1358 	 */
1359 	if ((route->re_flags & (RTF_REJECT | RTF_BLACKHOLE)) &&
1360 	    !ifdev_is_loopback(route->re_ifdev))
1361 		ifdev = ifdev_get_loopback();
1362 	else
1363 		ifdev = route->re_ifdev;
1364 
1365 	return ifdev_get_netif(ifdev);
1366 }
1367 
1368 /*
1369  * lwIP IPv4 routing hook.  Since this hook is called only from lwIP's own
1370  * ip4_route() implementation, this hook must never fire.  If it does, either
1371  * something is wrong with overriding ip4_route(), or lwIP added other places
1372  * from which this hook is called.  Both cases are highly problematic and must
1373  * be resolved somehow, which is why we simply call panic() here.
1374  */
1375 struct netif *
lwip_hook_ip4_route(const ip4_addr_t * dst)1376 lwip_hook_ip4_route(const ip4_addr_t * dst)
1377 {
1378 
1379 	panic("IPv4 routing hook called - this should not happen!");
1380 }
1381 
1382 /*
1383  * lwIP IPv4 ARP gateway hook.
1384  */
1385 const ip4_addr_t *
lwip_hook_etharp_get_gw(struct netif * netif,const ip4_addr_t * ip4addr)1386 lwip_hook_etharp_get_gw(struct netif * netif, const ip4_addr_t * ip4addr)
1387 {
1388 	static ip4_addr_t gw_addr; /* may be returned to the caller */
1389 	struct route_entry *route;
1390 
1391 	/* Look up the route for the destination IP address. */
1392 	if ((route = route_lookup_v4(ip4addr)) == NULL)
1393 		return NULL;
1394 
1395 	/*
1396 	 * This case could only ever trigger as a result of lwIP taking its own
1397 	 * routing decisions instead of calling the IPv4 routing hook.  While
1398 	 * not impossible, such cases should be extremely rare.  We cannot
1399 	 * provide a meaningful gateway address in this case either, though.
1400 	 */
1401 	if (route->re_ifdev != netif_get_ifdev(netif)) {
1402 		printf("LWIP: unexpected interface for gateway lookup\n");
1403 
1404 		return NULL;
1405 	}
1406 
1407 	/*
1408 	 * If this route has a gateway, return the IP address of the gateway.
1409 	 * Otherwise, the route is for a local network, and we would typically
1410 	 * not get here because lwIP performs the local-network check itself.
1411 	 * It is possible that the local network consists of more than one IP
1412 	 * range, and the user has configured a route for the other range.  In
1413 	 * that case, return the IP address of the actual destination.
1414 	 *
1415 	 * We store a packed version of the IPv4 address, so reconstruct the
1416 	 * unpacked version to a static variable first - for consistency with
1417 	 * the IPv6 code.
1418 	 */
1419 	if (route->re_flags & RTF_GATEWAY) {
1420 		ip4_addr_copy(gw_addr, route->re_gw4);
1421 
1422 		return &gw_addr;
1423 	} else
1424 		return ip4addr;
1425 }
1426 
1427 /*
1428  * lwIP IPv6 routing function.   Given an IPv6 source and destination address,
1429  * look up and return the target interface, or NULL if there is no route to the
1430  * address.  Our routing algorithm is destination-based, meaning that the
1431  * source address must be considered only to resolve zone ambiguity.
1432  *
1433  * This is a full replacement of the corresponding lwIP function, which should
1434  * be overridden with weak symbols, using patches against the lwIP source code.
1435  * As such, the lwIP headers should already provide the correct prototype for
1436  * this function.  If not, something will have changed in the lwIP
1437  * implementation, and this code must be revised accordingly.
1438  */
1439 struct netif *
ip6_route(const ip6_addr_t * src,const ip6_addr_t * dst)1440 ip6_route(const ip6_addr_t * src, const ip6_addr_t * dst)
1441 {
1442 	struct route_entry *route;
1443 	struct ifdev *ifdev;
1444 	ip6_addr_t dst_addr;
1445 	uint32_t zone;
1446 
1447 	assert(src != NULL);
1448 	assert(dst != NULL);
1449 
1450 	/*
1451 	 * If the destination address is scoped but has no zone, use the source
1452 	 * address to determine a zone, which we then set on the destination
1453 	 * address to find the route, if successful.  Obviously, the interface
1454 	 * is not going to be different from the zone, but we do need to check
1455 	 * other aspects of the route (e.g., one might want to null-route all
1456 	 * multicast traffic).  In the case that no source address is given at
1457 	 * all, first see if the destination address happens to be a locally
1458 	 * assigned address.  In theory this could yield multiple matches, so
1459 	 * pick the first one.  If not even that helps, we have absolutely
1460 	 * nothing we can use to refine route selection.  We could pick an
1461 	 * arbitrary interface in that case, but we currently don't.
1462 	 */
1463 	zone = IP6_NO_ZONE;
1464 
1465 	if (ip6_addr_lacks_zone(dst, IP6_UNKNOWN)) {
1466 		if (ip6_addr_has_zone(src))
1467 			zone = ip6_addr_zone(src);
1468 		else if (!ip6_addr_isany(src)) {
1469 			if ((ifdev = ifaddr_v6_map_by_addr(src)) == NULL)
1470 				return NULL; /* should never happen */
1471 			zone = ifdev_get_index(ifdev);
1472 		} else {
1473 			if ((ifdev = ifaddr_v6_map_by_addr(dst)) != NULL)
1474 				zone = ifdev_get_index(ifdev);
1475 			else
1476 				return NULL; /* TODO: try harder */
1477 		}
1478 
1479 		if (zone != IP6_NO_ZONE) {
1480 			dst_addr = *dst;
1481 
1482 			ip6_addr_set_zone(&dst_addr, zone);
1483 
1484 			dst = &dst_addr;
1485 		}
1486 	}
1487 
1488 	route = route_lookup_v6(dst);
1489 
1490 	/*
1491 	 * Look up the route for the destination IPv6 address.  If no route is
1492 	 * found at all, return NULL to the caller.
1493 	 */
1494 	if (route == NULL) {
1495 		/*
1496 		 * Since we rely on userland to create routes for on-link
1497 		 * prefixes and default routers, we do not have to call lwIP's
1498 		 * nd6_find_route() here.
1499 		 */
1500 
1501 		/* Generate an RTM_MISS message. */
1502 		route_miss_v6(dst);
1503 
1504 		return NULL;
1505 	}
1506 
1507 	/*
1508 	 * We have found a route based on the destination address.  If we did
1509 	 * not pick the destination address zone based on the source address,
1510 	 * we should now check for source address zone violations.  Note that
1511 	 * if even the destination address zone violates its target interface,
1512 	 * this case will be caught by route_lookup_v6().
1513 	 */
1514 	if (zone == IP6_NO_ZONE &&
1515 	    ifaddr_is_zone_mismatch(src, route->re_ifdev))
1516 		return NULL;
1517 
1518 	route->re_use++;
1519 
1520 	/*
1521 	 * See ip4_route() for an explanation of the use of loopback here.  For
1522 	 * the IPv6 case, the matching logic is in route_output_v6().
1523 	 */
1524 	if ((route->re_flags & (RTF_REJECT | RTF_BLACKHOLE)) &&
1525 	    !ifdev_is_loopback(route->re_ifdev))
1526 		ifdev = ifdev_get_loopback();
1527 	else
1528 		ifdev = route->re_ifdev;
1529 
1530 	/*
1531 	 * If the selected interface would cause the destination address to
1532 	 * leave its zone, fail route selection altogether.  This case may
1533 	 * trigger especially for reject routes, for which the interface change
1534 	 * to loopback may introduce a zone violation.
1535 	 */
1536 	if (ip6_addr_has_zone(dst) &&
1537 	    !ip6_addr_test_zone(dst, ifdev_get_netif(ifdev)))
1538 		return NULL;
1539 
1540 	return ifdev_get_netif(ifdev);
1541 }
1542 
1543 /*
1544  * lwIP IPv6 (source) routing hook.  Since this hook is called only from lwIP's
1545  * own ip6_route() implementation, this hook must never fire.  If it does,
1546  * either something is wrong with overriding ip6_route(), or lwIP added other
1547  * places from which this hook is called.  Both cases are highly problematic
1548  * and must be resolved somehow, which is why we simply call panic() here.
1549  */
1550 struct netif *
lwip_hook_ip6_route(const ip6_addr_t * src,const ip6_addr_t * dst)1551 lwip_hook_ip6_route(const ip6_addr_t * src, const ip6_addr_t * dst)
1552 {
1553 
1554 	panic("IPv6 routing hook called - this should not happen!");
1555 }
1556 
1557 /*
1558  * lwIP IPv6 ND6 gateway hook.
1559  */
1560 const ip6_addr_t *
lwip_hook_nd6_get_gw(struct netif * netif,const ip6_addr_t * ip6addr)1561 lwip_hook_nd6_get_gw(struct netif * netif, const ip6_addr_t * ip6addr)
1562 {
1563 	static ip6_addr_t gw_addr; /* may be returned to the caller */
1564 	struct route_entry *route;
1565 	struct ifdev *ifdev;
1566 
1567 	ifdev = netif_get_ifdev(netif);
1568 	assert(ifdev != NULL);
1569 
1570 	/* Look up the route for the destination IP address. */
1571 	if ((route = route_lookup_v6(ip6addr)) == NULL)
1572 		return NULL;
1573 
1574 	/* As for IPv4. */
1575 	if (route->re_ifdev != ifdev) {
1576 		printf("LWIP: unexpected interface for gateway lookup\n");
1577 
1578 		return NULL;
1579 	}
1580 
1581 	/*
1582 	 * We save memory by storing a packed (zoneless) version of the IPv6
1583 	 * gateway address.  That means we cannot return a pointer to it here.
1584 	 * Instead, we have to resort to expanding the address into a static
1585 	 * variable.  The caller will immediately make a copy anyway, though.
1586 	 */
1587 	if (route->re_flags & RTF_GATEWAY) {
1588 		ip6_addr_copy_from_packed(gw_addr, route->re_gw6);
1589 		ip6_addr_assign_zone(&gw_addr, IP6_UNKNOWN, netif);
1590 
1591 		return &gw_addr;
1592 	} else
1593 		return ip6addr;
1594 }
1595 
1596 /*
1597  * Check whether a packet is allowed to be sent to the given destination IPv4
1598  * address 'ipaddr' on the interface 'ifdev', according to route information.
1599  * Return TRUE if the packet should be sent.  Return FALSE if the packet should
1600  * be rejected or discarded, with 'err' set to the error to return to lwIP.
1601  */
1602 int
route_output_v4(struct ifdev * ifdev,const ip4_addr_t * ipaddr,err_t * err)1603 route_output_v4(struct ifdev * ifdev, const ip4_addr_t * ipaddr, err_t * err)
1604 {
1605 	const struct route_entry *route;
1606 
1607 	/* See if we should reject/blackhole packets to this destination. */
1608 	if (ifdev_is_loopback(ifdev) &&
1609 	    (route = route_lookup_v4(ipaddr)) != NULL &&
1610 	    (route->re_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
1611 		if (route->re_flags & RTF_REJECT)
1612 			*err = ERR_RTE;
1613 		else
1614 			*err = ERR_OK;
1615 
1616 		return FALSE;
1617 	}
1618 
1619 	return TRUE;
1620 }
1621 
1622 /*
1623  * Check whether a packet is allowed to be sent to the given destination IPv6
1624  * address 'ipaddr' on the interface 'ifdev', according to route information.
1625  * Return TRUE if the packet should be sent.  Return FALSE if the packet should
1626  * be rejected or discarded, with 'err' set to the error to return to lwIP.
1627  */
1628 int
route_output_v6(struct ifdev * ifdev,const ip6_addr_t * ipaddr,err_t * err)1629 route_output_v6(struct ifdev * ifdev, const ip6_addr_t * ipaddr, err_t * err)
1630 {
1631 	const struct route_entry *route;
1632 
1633 	/* Do one more zone violation test, just in case.  It's cheap. */
1634 	if (ip6_addr_has_zone(ipaddr) &&
1635 	    !ip6_addr_test_zone(ipaddr, ifdev_get_netif(ifdev))) {
1636 		*err = ERR_RTE;
1637 
1638 		return FALSE;
1639 	}
1640 
1641 	/* See if we should reject/blackhole packets to this destination. */
1642 	if (ifdev_is_loopback(ifdev) &&
1643 	    (route = route_lookup_v6(ipaddr)) != NULL &&
1644 	    (route->re_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
1645 		if (route->re_flags & RTF_REJECT)
1646 			*err = ERR_RTE;
1647 		else
1648 			*err = ERR_OK;
1649 
1650 		return FALSE;
1651 	}
1652 
1653 	return TRUE;
1654 }
1655