1 /* LWIP service - route.c - route management */ 2 /* 3 * This module provides a destination-based routing implementation, roughly 4 * matching the routing as done traditionally by the BSDs and by current NetBSD 5 * in particular. As such, this implementation almost completely replaces 6 * lwIP's own more limited (and less rigid) routing algorithms. It does this 7 * using a combination of overriding lwIP functions (ip4_route, ip6_route) with 8 * weak-symbol patching, and lwIP-provided gateway hooks. Especially the 9 * former gives us a level of control that lwIP's routing hooks do not provide: 10 * not only does such overriding give us the ability to flag that no route was 11 * found at all, we also bypass a number of default decisions taken by lwIP 12 * where the routing hooks are not called at all. 13 * 14 * As a result, the routing tables as visible to the user are an almost 15 * completely accurate reflection of the routing decisions taken by this TCP/IP 16 * stack in practice. There is currently only one exception: for IPv4 gateway 17 * selection, lwIP will bypass the gateway hook if the given address is on the 18 * local subnet according to the locally assigned IP address and subnet mask. 19 * This exception should practically affect noone, though. 20 * 21 * Our routing implementation differs from NetBSD's in various aspects, though. 22 * Perhaps the most important one, also noted elsewhere, is that we do not 23 * support the coexistence of an all-bits-set network route and a host route 24 * for the same IP address. If necessary, this issue can be resolved. 25 * 26 * We use a custom concept of "immutable" routes for local addresses, which are 27 * a somewhat special case as explained in the ifaddr module. Since those 28 * RTF_LOCAL routes cannot be deleted, a small change is made to the route(8) 29 * flush-all command to skip them. Packets directed at local addresses on 30 * non-loopback interfaces are handled in a way that differs from NetBSD's, 31 * too. This is explained in the ifdev module. 32 * 33 * The BSDs support special routes that reject or blackhole packets, based on 34 * routing flags. We support such routes as well, but implement them somewhat 35 * differently from the BSDs: such packets always get routed over a loopback 36 * interface (regardless of their associated interface), in order to save on 37 * routing lookups for packets in the common case. 38 * 39 * As general rules of thumb: if there is no route to a destination, assignment 40 * of a local address will already fail with a "no route to host" error. If 41 * there is an RTF_REJECT route, a local address will be assigned, but actual 42 * packets will be routed to a loopback interface and result in a "no route to 43 * host" error upon reception there - this is what NetBSD seems to do too, even 44 * though the documentation says that RTF_REJECT routes generate ICMP messages 45 * instead. RTF_BLACKHOLE behaves similarly to RTF_REJECT, except that the 46 * packet is simply discarded upon receipt by the loopback interface. 47 * 48 * In various places, both here and elsewhere, we check to make sure that on 49 * routing and output, scoped IPv6 source and destination addresses never leave 50 * their zone. For example, a packet must not be sent to an outgoing interface 51 * if its source address is a link-local address with a zone for another 52 * interface. lwIP does not check for such violations, and so we must make 53 * sure that this does not happen ourselves. 54 * 55 * Normally, one would tell lwIP to use a particular default IPv4 gateway by 56 * associating the gateway address to a particular interface, and then setting 57 * that interface as default interface (netif_default). We explicitly do 58 * neither of these things. Instead, the routing hooks should return the 59 * default route whenever applicable, and the gateway hooks should return the 60 * default route's gateway IP address whenever needed. 61 * 62 * Due to lwIP's limited set of error codes, we do not properly distinguish 63 * between cases where EHOSTUNREACH or ENETUNREACH should be thrown, and throw 64 * the former in most cases. 65 */ 66 67 #include "lwip.h" 68 #include "ifaddr.h" 69 #include "rttree.h" 70 #include "rtsock.h" 71 #include "route.h" 72 #include "lldata.h" 73 74 #include "lwip/nd6.h" 75 76 /* 77 * The maximum number of uint8_t bytes needed to represent a routing address. 78 * This value is the maximum of 4 (for IPv4) and 16 (for IPv6). 79 */ 80 #define ROUTE_ADDR_MAX (MAX(IP4_BITS, IP6_BITS) / NBBY) 81 82 /* 83 * We use a shared routing entry data structure for IPv4 and IPv6 routing 84 * entries. The result is cleaner code at the cost of (currently) about 2.3KB 85 * of memory wasted (costing 12 bytes per address for three addresses for 64 of 86 * the 128 routing entries that would be for IPv4), although with the benefit 87 * that either address family may use more than half of the routing entries. 88 * From that 2.3KB, 1KB can be reclaimed by moving the destination address and 89 * mask into the rttree_entry data structure, at the cost of its generality. 90 */ 91 struct route_entry { 92 struct rttree_entry re_entry; /* routing tree entry */ 93 union pxfer_re_pu { 94 struct ifdev *repu_ifdev; /* associated interface */ 95 SIMPLEQ_ENTRY(route_entry) repu_next; /* next free pointer */ 96 } re_pu; 97 unsigned int re_flags; /* routing flags (RTF_) */ 98 unsigned int re_use; /* number of times used */ 99 uint8_t re_addr[ROUTE_ADDR_MAX]; /* destination address */ 100 uint8_t re_mask[ROUTE_ADDR_MAX]; /* destination mask */ 101 union ixfer_re_gu { 102 ip4_addr_p_t regu_gw4; /* gateway (IPv4) */ 103 ip6_addr_p_t regu_gw6; /* gateway (IPv6) */ 104 } re_gu; 105 }; 106 #define re_ifdev re_pu.repu_ifdev 107 #define re_next re_pu.repu_next 108 #define re_gw4 re_gu.regu_gw4 109 #define re_gw6 re_gu.regu_gw6 110 111 /* Routes for local addresses are immutable, for reasons explained in ifdev. */ 112 #define route_is_immutable(route) ((route)->re_flags & RTF_LOCAL) 113 114 /* 115 * We override a subset of the BSD routing flags in order to store our own 116 * local settings. In particular, we have to have a way to store whether a 117 * route is for an IPv4 or IPv6 destination address. We override BSD's 118 * RTF_DONE flag for this: RTF_DONE is only used with routing sockets, and 119 * never associated with actual routes. In contrast, RTF_IPV6 is only used 120 * with actual routes, and never sent across routing sockets. In general, 121 * overriding flags is preferable to adding new ones, as BSD might later add 122 * more flags itself as well, while it can never remove existing flags. 123 */ 124 #define RTF_IPV6 RTF_DONE /* route is for an IPv6 destination */ 125 126 /* The total number of routing entries (IPv4 and IPv6 combined). */ 127 #define NR_ROUTE_ENTRY 128 128 129 static struct route_entry route_array[NR_ROUTE_ENTRY]; /* routing entries */ 130 131 static SIMPLEQ_HEAD(, route_entry) route_freelist; /* free entry list */ 132 133 /* The routing trees. There are two: one for IPv4 and one for IPv6. */ 134 #define ROUTE_TREE_V4 0 135 #define ROUTE_TREE_V6 1 136 #define NR_ROUTE_TREE 2 137 138 static struct rttree route_tree[NR_ROUTE_TREE]; 139 140 /* We support a single cached routing entry per address family (IPv4, IPv6). */ 141 static int rtcache_v4set; 142 static ip4_addr_t rtcache_v4addr; 143 static struct route_entry *rtcache_v4route; 144 145 static int rtcache_v6set; 146 static ip6_addr_t rtcache_v6addr; 147 static struct route_entry *rtcache_v6route; 148 149 /* 150 * Initialize the routing cache. There are a lot of trivial functions here, 151 * but this is designed to be extended in the future. 152 */ 153 static void 154 rtcache_init(void) 155 { 156 157 rtcache_v4set = FALSE; 158 rtcache_v6set = FALSE; 159 } 160 161 /* 162 * Look up the given IPv4 address in the routing cache. If there is a match, 163 * return TRUE with the associated route in 'route', possibly NULL if a 164 * negative result was cached. Return FALSE if the routing cache does not 165 * cache the given IPv4 address. 166 */ 167 static inline int 168 rtcache_lookup_v4(const ip4_addr_t * ipaddr, struct route_entry ** route) 169 { 170 171 if (rtcache_v4set && ip4_addr_cmp(&rtcache_v4addr, ipaddr)) { 172 *route = rtcache_v4route; 173 174 return TRUE; 175 } else 176 return FALSE; 177 } 178 179 /* 180 * Add the given IPv4 address and the given routing entry (NULL for negative 181 * caching) to the routing cache. 182 */ 183 static inline void 184 rtcache_add_v4(const ip4_addr_t * ipaddr, struct route_entry * route) 185 { 186 187 rtcache_v4addr = *ipaddr; 188 rtcache_v4route = route; 189 rtcache_v4set = TRUE; 190 } 191 192 /* 193 * Reset the IPv4 routing cache. 194 */ 195 static void 196 rtcache_reset_v4(void) 197 { 198 199 rtcache_v4set = FALSE; 200 } 201 202 /* 203 * Look up the given IPv6 address in the routing cache. If there is a match, 204 * return TRUE with the associated route in 'route', possibly NULL if a 205 * negative result was cached. Return FALSE if the routing cache does not 206 * cache the given IPv6 address. 207 */ 208 static inline int 209 rtcache_lookup_v6(const ip6_addr_t * ipaddr, struct route_entry ** route) 210 { 211 212 if (rtcache_v6set && ip6_addr_cmp(&rtcache_v6addr, ipaddr)) { 213 *route = rtcache_v6route; 214 215 return TRUE; 216 } else 217 return FALSE; 218 } 219 220 /* 221 * Add the given IPv6 address and the given routing entry (NULL for negative 222 * caching) to the routing cache. Caching of scoped addresses without zones is 223 * not supported. 224 */ 225 static inline void 226 rtcache_add_v6(const ip6_addr_t * ipaddr, struct route_entry * route) 227 { 228 229 rtcache_v6addr = *ipaddr; 230 rtcache_v6route = route; 231 rtcache_v6set = TRUE; 232 } 233 234 /* 235 * Reset the IPv6 routing cache. 236 */ 237 static void 238 rtcache_reset_v6(void) 239 { 240 241 rtcache_v6set = FALSE; 242 } 243 244 /* 245 * Initialize the routing module. 246 */ 247 void 248 route_init(void) 249 { 250 unsigned int slot; 251 252 /* Initialize the routing trees. */ 253 rttree_init(&route_tree[ROUTE_TREE_V4], IP4_BITS); 254 rttree_init(&route_tree[ROUTE_TREE_V6], IP6_BITS); 255 256 /* Initialize the list of free routing entries. */ 257 SIMPLEQ_INIT(&route_freelist); 258 259 for (slot = 0; slot < __arraycount(route_array); slot++) 260 SIMPLEQ_INSERT_TAIL(&route_freelist, &route_array[slot], 261 re_next); 262 263 /* Reset the routing cache. */ 264 rtcache_init(); 265 } 266 267 /* 268 * Prepare for a routing tree operation by converting the given IPv4 address 269 * into a raw address that can be used in that routing tree operation. 270 */ 271 static inline void 272 route_prepare_v4(const ip4_addr_t * ip4addr, uint8_t rtaddr[ROUTE_ADDR_MAX]) 273 { 274 uint32_t val; 275 276 val = ip4_addr_get_u32(ip4addr); 277 278 memcpy(rtaddr, &val, sizeof(val)); 279 } 280 281 /* 282 * Prepare for a routing tree operation by converting the given IPv6 address 283 * into a raw address that can be used in that routing tree operation. If the 284 * given prefix length allows for it, also incorporate the address zone. 285 */ 286 static inline void 287 route_prepare_v6(const ip6_addr_t * ip6addr, unsigned int prefix, 288 uint8_t rtaddr[ROUTE_ADDR_MAX]) 289 { 290 291 assert(sizeof(ip6addr->addr) == IP6_BITS / NBBY); 292 293 /* 294 * TODO: in most cases, we could actually return a pointer to the 295 * address contained in the given lwIP IP address structure. However, 296 * doing so would make a lot things quite a bit messier around here, 297 * but the small performance gain may still make it worth it. 298 */ 299 memcpy(rtaddr, ip6addr->addr, sizeof(ip6addr->addr)); 300 301 /* 302 * Embed the zone ID into the address, KAME style. This is the 303 * easiest way to have link-local addresses for multiple interfaces 304 * coexist in a single routing tree. Do this only if the full zone ID 305 * would be included in the prefix though, or we might de-normalize the 306 * address. 307 */ 308 if (ip6_addr_has_zone(ip6addr) && prefix >= 32) 309 rtaddr[3] = ip6_addr_zone(ip6addr); 310 } 311 312 /* 313 * Prepare for a routing tree operation by converting the given IP address into 314 * a raw address that can be used in that routing tree operation. The given 315 * address's zone ID is embedded "KAME-style" into the raw (IPv6) address when 316 * applicable and if the given prefix length allows for it. Return the index 317 * of the routing tree to use (ROUTE_TREE_V4 or ROUTE_TREE_V6). 318 */ 319 static unsigned int 320 route_prepare(const ip_addr_t * ipaddr, unsigned int prefix, 321 uint8_t rtaddr[ROUTE_ADDR_MAX]) 322 { 323 324 switch (IP_GET_TYPE(ipaddr)) { 325 case IPADDR_TYPE_V4: 326 route_prepare_v4(ip_2_ip4(ipaddr), rtaddr); 327 328 return ROUTE_TREE_V4; 329 330 case IPADDR_TYPE_V6: 331 route_prepare_v6(ip_2_ip6(ipaddr), prefix, rtaddr); 332 333 return ROUTE_TREE_V6; 334 335 default: 336 panic("unknown IP address type: %u", IP_GET_TYPE(ipaddr)); 337 } 338 } 339 340 /* 341 * The given routing tree (ROUTE_TREE_V4 or ROUTE_TREE_V6) has been updated. 342 * Invalidate any cache entries that may now have become stale, both locally 343 * and in lwIP. 344 */ 345 static void 346 route_updated(unsigned int tree) 347 { 348 349 if (tree == ROUTE_TREE_V6) { 350 rtcache_reset_v6(); 351 352 /* 353 * Also clear the lwIP ND6 destination cache, which may now 354 * contain entries for the wrong gateway. 355 */ 356 nd6_clear_destination_cache(); 357 } else 358 rtcache_reset_v4(); 359 } 360 361 /* 362 * Add a route to the appropriate routing table. The address, address zone, 363 * prefix, and RTF_HOST flag in the flags field make up the identity of the 364 * route. If the flags field contains RTF_GATEWAY, a gateway must be given; 365 * otherwise, it must be NULL. The route is associated with the given 366 * interface, which may not be NULL. The caller must ensure that the flags 367 * field does not contain unsupported flags. On success, return OK, and also 368 * also announce the addition. On failure, return a negative error code. 369 */ 370 int 371 route_add(const ip_addr_t * addr, unsigned int prefix, 372 const ip_addr_t * gateway, struct ifdev * ifdev, unsigned int flags, 373 const struct rtsock_request * rtr) 374 { 375 struct route_entry *route; 376 unsigned int tree, byte; 377 int r; 378 379 assert(flags & RTF_UP); 380 assert(!!(flags & RTF_GATEWAY) == (gateway != NULL)); 381 assert(ifdev != NULL); 382 383 /* Get a routing entry, if any are available. */ 384 if (SIMPLEQ_EMPTY(&route_freelist)) 385 return ENOBUFS; 386 387 route = SIMPLEQ_FIRST(&route_freelist); 388 389 /* 390 * Perform sanity checks on the input, and fill in enough of the 391 * routing entry to be able to try and add it to the routing tree. 392 */ 393 memset(route->re_addr, 0, sizeof(route->re_addr)); 394 395 tree = route_prepare(addr, prefix, route->re_addr); 396 397 switch (tree) { 398 case ROUTE_TREE_V4: 399 if (prefix > IP4_BITS || 400 (prefix != IP4_BITS && (flags & RTF_HOST))) 401 return EINVAL; 402 403 flags &= ~RTF_IPV6; 404 405 break; 406 407 case ROUTE_TREE_V6: 408 if (prefix > IP6_BITS || 409 (prefix != IP6_BITS && (flags & RTF_HOST))) 410 return EINVAL; 411 412 flags |= RTF_IPV6; 413 414 break; 415 416 default: 417 return EINVAL; 418 } 419 420 /* Generate the (raw) network mask. This is protocol agnostic! */ 421 addr_make_netmask(route->re_mask, sizeof(route->re_mask), prefix); 422 423 /* The given address must be normalized to its mask. */ 424 for (byte = 0; byte < __arraycount(route->re_addr); byte++) 425 if ((route->re_addr[byte] & ~route->re_mask[byte]) != 0) 426 return EINVAL; 427 428 /* 429 * Attempt to add the routing entry. Host-type entries do not have an 430 * associated mask, enabling ever-so-slightly faster matching. 431 */ 432 if ((r = rttree_add(&route_tree[tree], &route->re_entry, 433 route->re_addr, (flags & RTF_HOST) ? NULL : route->re_mask, 434 prefix)) != OK) 435 return r; 436 437 /* 438 * Success. Finish the routing entry. Remove the entry from the free 439 * list before assigning re_ifdev, as these two use the same memory. 440 */ 441 SIMPLEQ_REMOVE_HEAD(&route_freelist, re_next); 442 443 route->re_ifdev = ifdev; 444 route->re_flags = flags; 445 446 /* 447 * Store the gateway if one is given. Store the address in lwIP format 448 * because that is the easiest way use it later again. Store it as a 449 * union to keep the route entry structure as small as possible. Store 450 * the address without its zone, because the gateway's address zone is 451 * implied by its associated ifdev. 452 * 453 * If no gateway is given, this is a link-type route, i.e., a route for 454 * a local network, with all nodes directly connected and reachable. 455 */ 456 if (flags & RTF_GATEWAY) { 457 if (flags & RTF_IPV6) 458 ip6_addr_copy_to_packed(route->re_gw6, 459 *ip_2_ip6(gateway)); 460 else 461 ip4_addr_copy(route->re_gw4, *ip_2_ip4(gateway)); 462 } 463 464 /* We have made routing changes. */ 465 route_updated(tree); 466 467 /* Announce the route addition. */ 468 rtsock_msg_route(route, RTM_ADD, rtr); 469 470 return OK; 471 } 472 473 /* 474 * Check whether it is possible to add a route for the given destination to the 475 * corresponding routing table, that is, a subsequent route_add() call for this 476 * destination address is guaranteed to succeed (if all its parameters are 477 * valid). Return TRUE if adding the route is guaranteed to succeed, or FALSE 478 * if creating a route for the given destination would fail. 479 */ 480 int 481 route_can_add(const ip_addr_t * addr, unsigned int prefix, 482 int is_host __unused) 483 { 484 uint8_t rtaddr[ROUTE_ADDR_MAX]; 485 unsigned int tree; 486 487 tree = route_prepare(addr, prefix, rtaddr); 488 489 /* 490 * The corresponding routing tree must not already contain an exact 491 * match for the destination. If the routing tree implementation is 492 * ever extended with support for coexisting host and net entries with 493 * the same prefix, we should also pass in 'is_host' here. 494 */ 495 if (rttree_lookup_exact(&route_tree[tree], rtaddr, prefix) != NULL) 496 return FALSE; 497 498 /* There must be a routing entry on the free list as well. */ 499 return !SIMPLEQ_EMPTY(&route_freelist); 500 } 501 502 /* 503 * Find a route with the exact given route identity. Return the route if 504 * found, or NULL if no route exists with this identity. 505 */ 506 struct route_entry * 507 route_find(const ip_addr_t * addr, unsigned int prefix, int is_host) 508 { 509 struct rttree_entry *entry; 510 struct route_entry *route; 511 uint8_t rtaddr[ROUTE_ADDR_MAX]; 512 unsigned int tree; 513 514 tree = route_prepare(addr, prefix, rtaddr); 515 516 entry = rttree_lookup_exact(&route_tree[tree], rtaddr, prefix); 517 if (entry == NULL) 518 return NULL; 519 520 route = (struct route_entry *)entry; 521 522 /* 523 * As long as the routing tree code does not support coexisting host 524 * and net entries with the same prefix, we have to check the type. 525 */ 526 if (!!(route->re_flags & RTF_HOST) != is_host) 527 return NULL; 528 529 return route; 530 } 531 532 /* 533 * A route lookup failed for the given IP address. Generate an RTM_MISS 534 * message on routing sockets. 535 */ 536 static void 537 route_miss(const ip_addr_t * ipaddr) 538 { 539 union sockaddr_any addr; 540 socklen_t addr_len; 541 542 addr_len = sizeof(addr); 543 544 addr_put_inet(&addr.sa, &addr_len, ipaddr, TRUE /*kame*/, 0 /*port*/); 545 546 rtsock_msg_miss(&addr.sa); 547 } 548 549 /* 550 * A route lookup failed for the given IPv4 address. Generate an RTM_MISS 551 * message on routing sockets. 552 */ 553 static void 554 route_miss_v4(const ip4_addr_t * ip4addr) 555 { 556 ip_addr_t ipaddr; 557 558 ip_addr_copy_from_ip4(ipaddr, *ip4addr); 559 560 route_miss(&ipaddr); 561 } 562 563 /* 564 * A route lookup failed for the given IPv6 address. Generate an RTM_MISS 565 * message on routing sockets. 566 */ 567 static void 568 route_miss_v6(const ip6_addr_t * ip6addr) 569 { 570 ip_addr_t ipaddr; 571 572 ip_addr_copy_from_ip6(ipaddr, *ip6addr); 573 574 route_miss(&ipaddr); 575 } 576 577 /* 578 * Look up the most narrow matching routing entry for the given IPv4 address. 579 * Return the routing entry if one exists at all, or NULL otherwise. This 580 * function performs caching. 581 */ 582 static inline struct route_entry * 583 route_lookup_v4(const ip4_addr_t * ip4addr) 584 { 585 uint8_t rtaddr[ROUTE_ADDR_MAX]; 586 struct route_entry *route; 587 588 /* 589 * Look up the route for the destination IP address, unless we have a 590 * cached route entry. We cache negatives in order to avoid generating 591 * lots of RTM_MISS messages for the same destination in a row. 592 */ 593 if (rtcache_lookup_v4(ip4addr, &route)) 594 return route; 595 596 route_prepare_v4(ip4addr, rtaddr); 597 598 route = (struct route_entry *) 599 rttree_lookup_match(&route_tree[ROUTE_TREE_V4], rtaddr); 600 601 /* Cache the result, even if we found no route. */ 602 rtcache_add_v4(ip4addr, route); 603 604 return route; 605 } 606 607 /* 608 * Look up the most narrow matching routing entry for the given IPv6 address, 609 * taking into account its zone ID if applicable. Return the routing entry if 610 * one exists at all, or NULL otherwise. This function performs caching. 611 */ 612 static inline struct route_entry * 613 route_lookup_v6(const ip6_addr_t * ip6addr) 614 { 615 uint8_t rtaddr[ROUTE_ADDR_MAX]; 616 struct route_entry *route; 617 int use_cache; 618 619 /* 620 * We do not support caching of addresses that should have a zone but 621 * do not: in different contexts, such addresses could yield different 622 * routes. 623 */ 624 use_cache = !ip6_addr_lacks_zone(ip6addr, IP6_UNKNOWN); 625 626 if (use_cache && rtcache_lookup_v6(ip6addr, &route)) 627 return route; 628 629 route_prepare_v6(ip6addr, IP6_BITS, rtaddr); 630 631 route = (struct route_entry *) 632 rttree_lookup_match(&route_tree[ROUTE_TREE_V6], rtaddr); 633 634 /* Cache the result, even if no route was found. */ 635 if (use_cache) 636 rtcache_add_v6(ip6addr, route); 637 638 return route; 639 } 640 641 /* 642 * Look up the most narrow matching routing entry for the given IP address, 643 * taking into account its zone ID if applicable. Return the routing entry if 644 * one exists at all, or NULL otherwise. This function performs caching. 645 */ 646 struct route_entry * 647 route_lookup(const ip_addr_t * addr) 648 { 649 650 if (IP_IS_V4(addr)) 651 return route_lookup_v4(ip_2_ip4(addr)); 652 else 653 return route_lookup_v6(ip_2_ip6(addr)); 654 } 655 656 /* 657 * Change an existing routing entry. Its flags are always updated to the new 658 * set of given flags, although certain flags are always preserved. If the 659 * new flags set has RTF_GATEWAY set and 'gateway' is not NULL, update the 660 * gateway associated with the route. If 'ifdev' is not NULL, reassociate the 661 * route with the given interface; this will not affect the zone of the 662 * route's destination address. On success, return OK, and also announce the 663 * change. On failure, return a negative error code. 664 */ 665 static int 666 route_change(struct route_entry * route, const ip_addr_t * gateway, 667 struct ifdev * ifdev, unsigned int flags, 668 const struct rtsock_request * rtr) 669 { 670 unsigned int tree, preserve; 671 672 tree = (route->re_flags & RTF_IPV6) ? ROUTE_TREE_V6 : ROUTE_TREE_V4; 673 674 /* Update the associated interface (only) if a new one is given. */ 675 if (ifdev != NULL) 676 route->re_ifdev = ifdev; 677 678 /* 679 * These flags may not be changed. RTF_UP should always be set anyway. 680 * RTF_HOST and RTF_IPV6 are part of the route's identity. RTF_LOCAL 681 * should be preserved as well, although we will not get here if either 682 * the old or the new flags have it set anyway. 683 */ 684 preserve = RTF_UP | RTF_HOST | RTF_IPV6 | RTF_LOCAL; 685 686 /* Always update the flags. There is no way not to. */ 687 route->re_flags = (route->re_flags & preserve) | (flags & ~preserve); 688 689 /* 690 * If a new gateway is given *and* RTF_GATEWAY is set, update the 691 * gateway. If RTF_GATEWAY is not set, this is a link-type route with 692 * no gateway. If no new gateway is given, we keep the gateway as is. 693 */ 694 if (gateway != NULL && (flags & RTF_GATEWAY)) { 695 if (flags & RTF_IPV6) 696 ip6_addr_copy_to_packed(route->re_gw6, 697 *ip_2_ip6(gateway)); 698 else 699 ip4_addr_copy(route->re_gw4, *ip_2_ip4(gateway)); 700 } 701 702 /* We have made routing changes. */ 703 route_updated(tree); 704 705 /* Announce the route change. */ 706 rtsock_msg_route(route, RTM_CHANGE, rtr); 707 708 return OK; 709 } 710 711 /* 712 * Delete the given route, and announce its deletion. 713 */ 714 void 715 route_delete(struct route_entry * route, const struct rtsock_request * rtr) 716 { 717 unsigned int tree; 718 719 /* First announce the deletion, while the route is still around. */ 720 tree = (route->re_flags & RTF_IPV6) ? ROUTE_TREE_V6 : ROUTE_TREE_V4; 721 722 rtsock_msg_route(route, RTM_DELETE, rtr); 723 724 /* Then actually delete the route. */ 725 rttree_delete(&route_tree[tree], &route->re_entry); 726 727 SIMPLEQ_INSERT_HEAD(&route_freelist, route, re_next); 728 729 /* We have made routing changes. */ 730 route_updated(tree); 731 } 732 733 /* 734 * Delete all routes associated with the given interface, typically as part of 735 * destroying the interface. 736 */ 737 void 738 route_clear(struct ifdev * ifdev) 739 { 740 struct rttree_entry *entry, *parent; 741 struct route_entry *route; 742 unsigned int tree; 743 744 /* 745 * Delete all routes associated with the given interface. Fortunately, 746 * we need not also delete addresses zoned to the given interface, 747 * because no route can be created with a zone ID that does not match 748 * the associated interface. That is the main reason why we ignore 749 * zone IDs for gateways when adding or changing routes.. 750 */ 751 for (tree = 0; tree < NR_ROUTE_TREE; tree++) { 752 parent = NULL; 753 754 while ((entry = rttree_enum(&route_tree[tree], 755 parent)) != NULL) { 756 route = (struct route_entry *)entry; 757 758 if (route->re_ifdev == ifdev) 759 route_delete(route, NULL /*request*/); 760 else 761 parent = entry; 762 } 763 } 764 } 765 766 /* 767 * Process a routing command specifically for an IPv4 or IPv6 route, as one of 768 * the specific continuations of processing started by route_process(). The 769 * RTM_ routing command is given as 'type'. The route destination is given as 770 * 'dst_addr'; its address type determines whether the operation is for IPv4 or 771 * IPv6. The sockaddr structures for 'mask' and 'gateway' are passed on as is 772 * and may have to be parsed here if not NULL. 'ifdev' is the interface to be 773 * associated with a route; it is non-NULL only if an interface name (IFP) or 774 * address (IFA) was given. The RTF_ flags field 'flags' has been checked 775 * against the globally supported flags, but may have to be checked for flags 776 * that do not apply to IPv4/IPv6 routes. Return OK or a negative error code, 777 * following the same semantics as route_process(). 778 */ 779 static int 780 route_process_inet(unsigned int type, const ip_addr_t * dst_addr, 781 const struct sockaddr * mask, const struct sockaddr * gateway, 782 struct ifdev * ifdev, unsigned int flags, 783 const struct rtsock_request * rtr) 784 { 785 struct route_entry *route; 786 ip_addr_t gw_storage, *gw_addr; 787 struct ifdev *ifdev2; 788 uint32_t zone; 789 unsigned int prefix; 790 int r; 791 792 assert(!(flags & RTF_LLDATA)); 793 794 if ((flags & (RTF_DYNAMIC | RTF_MODIFIED | RTF_DONE | RTF_XRESOLVE | 795 RTF_LLINFO | RTF_CLONED | RTF_SRC | RTF_ANNOUNCE | 796 RTF_BROADCAST)) != 0) 797 return EINVAL; 798 799 /* 800 * For network entries, a network mask must be provided in all cases. 801 * For host entries, the network mask is ignored, and we use a prefix 802 * with all bits set. 803 */ 804 if (!(flags & RTF_HOST)) { 805 if (mask == NULL) 806 return EINVAL; 807 808 if ((r = addr_get_netmask(mask, mask->sa_len, 809 IP_GET_TYPE(dst_addr), &prefix, NULL /*ipaddr*/)) != OK) 810 return r; 811 } else { 812 if (IP_IS_V4(dst_addr)) 813 prefix = IP4_BITS; 814 else 815 prefix = IP6_BITS; 816 } 817 818 gw_addr = NULL; 819 820 /* 821 * Determine the gateway and interface for the routing entry, if 822 * applicable. 823 */ 824 if (type == RTM_ADD || type == RTM_CHANGE) { 825 /* 826 * The RTF_UP flag must always be set, but only if the flags 827 * field is used at all. 828 */ 829 if (!(flags & RTF_UP)) 830 return EINVAL; 831 832 if ((flags & RTF_GATEWAY) && gateway != NULL) { 833 if ((r = addr_get_inet(gateway, gateway->sa_len, 834 IP_GET_TYPE(dst_addr), &gw_storage, TRUE /*kame*/, 835 NULL /*port*/)) != OK) 836 return r; 837 838 gw_addr = &gw_storage; 839 840 /* 841 * We use the zone of the gateway to help determine the 842 * interface, but we do not reject a mismatching zone 843 * here. The reason for this is that we do not want 844 * routes that have zones for an interface other than 845 * the one associated with the route, as that could 846 * create a world of trouble: packets leaving their 847 * zone, complications with cleaning up interfaces.. 848 */ 849 if (IP_IS_V6(gw_addr) && 850 ip6_addr_has_zone(ip_2_ip6(gw_addr))) { 851 zone = ip6_addr_zone(ip_2_ip6(gw_addr)); 852 853 ifdev2 = ifdev_get_by_index(zone); 854 855 if (ifdev != NULL && ifdev != ifdev2) 856 return EINVAL; 857 else 858 ifdev = ifdev2; 859 } 860 861 /* 862 * If we still have no interface at this point, see if 863 * we can find one based on just the gateway address. 864 * See if a locally attached network owns the address. 865 * That may not succeed, leaving ifdev set to NULL. 866 */ 867 if (ifdev == NULL) 868 ifdev = ifaddr_map_by_subnet(gw_addr); 869 } 870 871 /* 872 * When adding routes, all necessary information must be given. 873 * When changing routes, we can leave some settings as is. 874 */ 875 if (type == RTM_ADD) { 876 if ((flags & RTF_GATEWAY) && gw_addr == NULL) 877 return EINVAL; 878 879 /* TODO: try harder to find a matching interface.. */ 880 if (ifdev == NULL) 881 return ENETUNREACH; 882 } 883 } 884 885 /* 886 * All route commands except RTM_ADD require that a route exists for 887 * the given identity, although RTM_GET, when requesting a host entry, 888 * may return a wider (network) route based on just the destination 889 * address. 890 */ 891 if (type != RTM_ADD) { 892 /* For RTM_GET (only), a host query may return a net route. */ 893 if (type == RTM_GET && (flags & RTF_HOST)) 894 route = route_lookup(dst_addr); 895 else 896 route = route_find(dst_addr, prefix, 897 !!(flags & RTF_HOST)); 898 899 if (route == NULL) 900 return ESRCH; 901 } else 902 route = NULL; 903 904 /* Process the actual routing command. */ 905 switch (type) { 906 case RTM_ADD: 907 return route_add(dst_addr, prefix, gw_addr, ifdev, flags, rtr); 908 909 case RTM_CHANGE: 910 /* Routes for local addresses are immutable. */ 911 if (route_is_immutable(route)) 912 return EPERM; 913 914 return route_change(route, gw_addr, ifdev, flags, rtr); 915 916 case RTM_DELETE: 917 /* Routes for local addresses are immutable. */ 918 if (route_is_immutable(route)) 919 return EPERM; 920 921 route_delete(route, rtr); 922 923 return OK; 924 925 case RTM_LOCK: 926 /* 927 * TODO: implement even the suggestion that we support this. 928 * For now, we do not keep per-route metrics, let alone change 929 * them dynamically ourselves, so "locking" metrics is really 930 * not a concept that applies to us. We may however have to 931 * save the lock mask and return it in queries.. 932 */ 933 /* FALLTHROUGH */ 934 case RTM_GET: 935 /* Simply generate a message for the route we just found. */ 936 rtsock_msg_route(route, type, rtr); 937 938 return OK; 939 940 default: 941 return EINVAL; 942 } 943 } 944 945 /* 946 * Process a routing command from a routing socket. The RTM_ type of command 947 * is given as 'type', and is one of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_GET, 948 * RTM_LOCK. In addition, the function takes a set of sockaddr pointers as 949 * provided by the routing command. Each of these sockaddr pointers may be 950 * NULL; if not NULL, the structure is at least large enough to contain the 951 * address length (sa_len) and family (sa_family), and the length never exceeds 952 * the amount of memory used to store the sockaddr structure. However, the 953 * length itself has not yet been checked against the expected protocol 954 * structure and could even be zero. The command's RTF_ routing flags and 955 * metrics are provided as well. On success, return OK, in which case the 956 * caller assumes that a routing socket announcement for the processed command 957 * has been sent already (passing on 'rtr' to the announcement function as is). 958 * On failure, return a negative error code; in that case, the caller will send 959 * a failure response on the original routing socket itself. 960 */ 961 int 962 route_process(unsigned int type, const struct sockaddr * dst, 963 const struct sockaddr * mask, const struct sockaddr * gateway, 964 const struct sockaddr * ifp, const struct sockaddr * ifa, 965 unsigned int flags, unsigned long inits, 966 const struct rt_metrics * rmx, const struct rtsock_request * rtr) 967 { 968 struct ifdev *ifdev, *ifdev2; 969 char name[IFNAMSIZ]; 970 ip_addr_t dst_addr, if_addr; 971 uint32_t zone; 972 uint8_t addr_type; 973 int r; 974 975 /* 976 * The identity of a route is determined by its destination address, 977 * destination zone, prefix length, and whether it is a host entry 978 * or not. If it is a host entry (RTF_HOST is set), the prefix length 979 * is implied by the protocol; otherwise it should be obtained from the 980 * given netmask if necessary. For link-local addresses, the zone ID 981 * must be embedded KAME-style in the destination address. A 982 * destination address must always be given. The destination address 983 * also determines the overall address family. 984 */ 985 if (dst == NULL) 986 return EINVAL; 987 988 switch (dst->sa_family) { 989 case AF_INET: 990 addr_type = IPADDR_TYPE_V4; 991 break; 992 #ifdef INET6 993 case AF_INET6: 994 addr_type = IPADDR_TYPE_V6; 995 break; 996 #endif /* INET6 */ 997 default: 998 return EAFNOSUPPORT; 999 } 1000 1001 if ((r = addr_get_inet(dst, dst->sa_len, addr_type, &dst_addr, 1002 TRUE /*kame*/, NULL /*port*/)) != OK) 1003 return r; 1004 1005 /* 1006 * Perform a generic test on the given flags. This covers everything 1007 * we support at all, plus a few flags we ignore. Specific route types 1008 * may have further restrictions; those tests are performed later. 1009 */ 1010 if ((flags & ~(RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_REJECT | 1011 RTF_CLONING | RTF_LLINFO | RTF_LLDATA | RTF_STATIC | 1012 RTF_BLACKHOLE | RTF_CLONED | RTF_PROTO2 | RTF_PROTO1)) != 0) 1013 return EINVAL; 1014 1015 ifdev = NULL; 1016 1017 if (type == RTM_ADD || type == RTM_CHANGE) { 1018 /* 1019 * If an interface address or name is given, use that to 1020 * identify the target interface. If both are given, make sure 1021 * that both identify the same interface--a hopefully helpful 1022 * feature to detect wrong route(8) usage (NetBSD simply takes 1023 * IFP over IFA). An empty interface name is ignored on the 1024 * basis that libc link_addr(3) is broken. 1025 */ 1026 if (ifp != NULL) { 1027 if ((r = addr_get_link(ifp, ifp->sa_len, name, 1028 sizeof(name), NULL /*hwaddr*/, 1029 0 /*hwaddr_len*/)) != OK) 1030 return r; 1031 1032 if (name[0] != '\0' && 1033 (ifdev = ifdev_find_by_name(name)) == NULL) 1034 return ENXIO; 1035 } 1036 1037 if (ifa != NULL) { 1038 /* 1039 * This is similar to retrieval of source addresses in 1040 * ipsock, with the difference that we do not impose 1041 * that a zone ID be given for link-local addresses. 1042 */ 1043 if ((r = addr_get_inet(ifa, ifa->sa_len, addr_type, 1044 &if_addr, TRUE /*kame*/, NULL /*port*/)) != OK) 1045 return r; 1046 1047 if ((ifdev2 = ifaddr_map_by_addr(&if_addr)) == NULL) 1048 return EADDRNOTAVAIL; 1049 1050 if (ifdev != NULL && ifdev != ifdev2) 1051 return EINVAL; 1052 else 1053 ifdev = ifdev2; 1054 } 1055 1056 /* 1057 * If the destination address has a zone, then it must not 1058 * conflict with the interface, if one was given. If not, we 1059 * may use it to decide the interface to use for the route. 1060 */ 1061 if (IP_IS_V6(&dst_addr) && 1062 ip6_addr_has_zone(ip_2_ip6(&dst_addr))) { 1063 if (ifdev == NULL) { 1064 zone = ip6_addr_zone(ip_2_ip6(&dst_addr)); 1065 1066 ifdev = ifdev_get_by_index(zone); 1067 } else { 1068 if (!ip6_addr_test_zone(ip_2_ip6(&dst_addr), 1069 ifdev_get_netif(ifdev))) 1070 return EADDRNOTAVAIL; 1071 } 1072 } 1073 } 1074 1075 /* 1076 * For now, no initializers are supported by any of the sub-processing 1077 * routines, so outright reject requests that set any initializers. 1078 * Most importantly, we do not support per-route MTU settings (RTV_MTU) 1079 * because lwIP would not use them, and we do not support non-zero 1080 * expiry (RTV_EXPIRE) because for IPv4/IPv6 routes it is not a widely 1081 * used feature and for ARP/NDP we would have to change lwIP. 1082 * dhcpcd(8) does supply RTV_MTU, we have to ignore that option rather 1083 * than reject it, unfortunately. arp(8) always sets RTV_EXPIRE, so we 1084 * reject only non-zero expiry there. 1085 */ 1086 if ((inits & ~(RTV_EXPIRE | RTV_MTU)) != 0 || 1087 ((inits & RTV_EXPIRE) != 0 && rmx->rmx_expire != 0)) 1088 return ENOSYS; 1089 1090 /* 1091 * From here on, the processing differs for ARP, NDP, and IP routes. 1092 * As of writing, our userland is from NetBSD 7, which puts link-local 1093 * route entries in its main route tables. This means we would have to 1094 * search for existing routes before we can determine whether, say, a 1095 * RTM_GET request is for an IP or an ARP route entry. As of NetBSD 8, 1096 * the link-local administration is separated, and all requests use the 1097 * RTF_LLDATA flag to indicate that they are for ARP/NDP routes rather 1098 * than IP routes. Since that change makes things much cleaner for us, 1099 * we borrow from the future, patching arp(8) and ndp(8) to add the 1100 * RTF_LLDATA flag now, so that we can implement a clean split here. 1101 */ 1102 if (!(flags & RTF_LLDATA)) 1103 return route_process_inet(type, &dst_addr, mask, gateway, 1104 ifdev, flags, rtr); 1105 else 1106 return lldata_process(type, &dst_addr, gateway, ifdev, flags, 1107 rtr); 1108 } 1109 1110 /* 1111 * Return the routing flags (RTF_) for the given routing entry. Strip out any 1112 * internal flags. 1113 */ 1114 unsigned int 1115 route_get_flags(const struct route_entry * route) 1116 { 1117 1118 return route->re_flags & ~RTF_IPV6; 1119 } 1120 1121 /* 1122 * Return TRUE if the given routing entry is for the IPv6 address family, or 1123 * FALSE if it is for IPv4. 1124 */ 1125 int 1126 route_is_ipv6(const struct route_entry * route) 1127 { 1128 1129 return !!(route->re_flags & RTF_IPV6); 1130 } 1131 1132 /* 1133 * Return the interface associated with the given routing entry. The resulting 1134 * interface is never NULL. 1135 */ 1136 struct ifdev * 1137 route_get_ifdev(const struct route_entry * route) 1138 { 1139 1140 return route->re_ifdev; 1141 } 1142 1143 /* 1144 * Convert the given raw routing address pointed to by 'rtaddr' into a 1145 * lwIP-style IP address 'ipaddr' of type 'type', which must by IPADDR_TYPE_V4 1146 * or IPADDR_TYPE_V6. 1147 */ 1148 static void 1149 route_get_addr(ip_addr_t * ipaddr, const uint8_t * rtaddr, uint8_t type) 1150 { 1151 ip6_addr_t *ip6addr; 1152 uint32_t val, zone; 1153 1154 /* 1155 * Convert the routing address to a lwIP-type IP address. Take out the 1156 * KAME-style embedded zone, if needed. 1157 */ 1158 memset(ipaddr, 0, sizeof(*ipaddr)); 1159 IP_SET_TYPE(ipaddr, type); 1160 1161 switch (type) { 1162 case IPADDR_TYPE_V4: 1163 memcpy(&val, rtaddr, sizeof(val)); 1164 1165 ip_addr_set_ip4_u32(ipaddr, val); 1166 1167 break; 1168 1169 case IPADDR_TYPE_V6: 1170 ip6addr = ip_2_ip6(ipaddr); 1171 1172 memcpy(ip6addr->addr, rtaddr, sizeof(ip6addr->addr)); 1173 1174 if (ip6_addr_has_scope(ip6addr, IP6_UNKNOWN)) { 1175 zone = ntohl(ip6addr->addr[0]) & 0x0000ffffU; 1176 1177 ip6addr->addr[0] &= PP_HTONL(0xffff0000U); 1178 1179 ip6_addr_set_zone(ip6addr, zone); 1180 } 1181 1182 break; 1183 1184 default: 1185 panic("unknown IP address type: %u", type); 1186 } 1187 } 1188 1189 /* 1190 * Obtain information about an IPv4 or IPv6 routing entry, by filling 'addr', 1191 * 'mask', 'gateway', and optionally (if not NULL) 'ifp' and 'ifa' with 1192 * sockaddr-type data for each of those fields. Also store the associated 1193 * interface in 'ifdevp', the routing entry's flags in 'flags', and the route's 1194 * usage count in 'use'. 1195 */ 1196 void 1197 route_get(const struct route_entry * route, union sockaddr_any * addr, 1198 union sockaddr_any * mask, union sockaddr_any * gateway, 1199 union sockaddr_any * ifp, union sockaddr_any * ifa, 1200 struct ifdev ** ifdevp, unsigned int * flags, unsigned int * use) 1201 { 1202 const ip_addr_t *src_addr; 1203 ip_addr_t dst_addr, gw_addr; 1204 struct ifdev *ifdev; 1205 socklen_t addr_len; 1206 uint8_t type; 1207 1208 type = (route->re_flags & RTF_IPV6) ? IPADDR_TYPE_V6 : IPADDR_TYPE_V4; 1209 1210 /* Get the destination address. */ 1211 route_get_addr(&dst_addr, route->re_addr, type); 1212 1213 addr_len = sizeof(*addr); 1214 1215 addr_put_inet(&addr->sa, &addr_len, &dst_addr, TRUE /*kame*/, 1216 0 /*port*/); 1217 1218 /* Get the network mask, if applicable. */ 1219 if (!(route->re_flags & RTF_HOST)) { 1220 addr_len = sizeof(*mask); 1221 1222 addr_put_netmask(&mask->sa, &addr_len, type, 1223 rttree_get_prefix(&route->re_entry)); 1224 } else 1225 mask->sa.sa_len = 0; 1226 1227 /* Get the gateway, which may be an IP address or a local link. */ 1228 addr_len = sizeof(*gateway); 1229 1230 ifdev = route->re_ifdev; 1231 1232 if (route->re_flags & RTF_GATEWAY) { 1233 if (type == IPADDR_TYPE_V4) 1234 ip_addr_copy_from_ip4(gw_addr, route->re_gw4); 1235 else 1236 ip_addr_copy_from_ip6_packed(gw_addr, route->re_gw6); 1237 1238 addr_put_inet(&gateway->sa, &addr_len, &gw_addr, TRUE /*kame*/, 1239 0 /*port*/); 1240 } else { 1241 addr_put_link(&gateway->sa, &addr_len, ifdev_get_index(ifdev), 1242 ifdev_get_iftype(ifdev), NULL /*name*/, NULL /*hwaddr*/, 1243 0 /*hwaddr_len*/); 1244 } 1245 1246 /* Get the associated interface name. */ 1247 if (ifp != NULL) { 1248 addr_len = sizeof(*ifp); 1249 1250 addr_put_link(&ifp->sa, &addr_len, ifdev_get_index(ifdev), 1251 ifdev_get_iftype(ifdev), ifdev_get_name(ifdev), 1252 NULL /*hwaddr*/, 0 /*hwaddr_len*/); 1253 } 1254 1255 /* Get the associated source address, if we can determine one. */ 1256 if (ifa != NULL) { 1257 src_addr = ifaddr_select(&dst_addr, ifdev, NULL /*ifdevp*/); 1258 1259 if (src_addr != NULL) { 1260 addr_len = sizeof(*ifa); 1261 1262 addr_put_inet(&ifa->sa, &addr_len, src_addr, 1263 TRUE /*kame*/, 0 /*port*/); 1264 } else 1265 ifa->sa.sa_len = 0; 1266 } 1267 1268 /* Get other fields. */ 1269 *flags = route_get_flags(route); /* strip any internal flags */ 1270 *ifdevp = ifdev; 1271 *use = route->re_use; 1272 } 1273 1274 /* 1275 * Enumerate IPv4 routing entries. Return the first IPv4 routing entry if 1276 * 'last' is NULL, or the next routing entry after 'last' if it is not NULL. 1277 * In both cases, the return value may be NULL if there are no more routes. 1278 */ 1279 struct route_entry * 1280 route_enum_v4(struct route_entry * last) 1281 { 1282 1283 assert(last == NULL || !(last->re_flags & RTF_IPV6)); 1284 1285 return (struct route_entry *)rttree_enum(&route_tree[ROUTE_TREE_V4], 1286 (last != NULL) ? &last->re_entry : NULL); 1287 } 1288 1289 /* 1290 * Enumerate IPv6 routing entries. Return the first IPv6 routing entry if 1291 * 'last' is NULL, or the next routing entry after 'last' if it is not NULL. 1292 * In both cases, the return value may be NULL if there are no more routes. 1293 */ 1294 struct route_entry * 1295 route_enum_v6(struct route_entry * last) 1296 { 1297 1298 assert(last == NULL || (last->re_flags & RTF_IPV6)); 1299 1300 return (struct route_entry *)rttree_enum(&route_tree[ROUTE_TREE_V6], 1301 (last != NULL) ? &last->re_entry : NULL); 1302 } 1303 1304 /* 1305 * lwIP IPv4 routing function. Given an IPv4 destination address, look up and 1306 * return the target interface, or NULL if there is no route to the address. 1307 * 1308 * This is a full replacement of the corresponding lwIP function, which should 1309 * be overridden with weak symbols, using patches against the lwIP source code. 1310 * As such, the lwIP headers should already provide the correct prototype for 1311 * this function. If not, something will have changed in the lwIP 1312 * implementation, and this code must be revised accordingly. 1313 */ 1314 struct netif * 1315 ip4_route(const ip4_addr_t * dst) 1316 { 1317 struct route_entry *route; 1318 struct ifdev *ifdev; 1319 1320 /* 1321 * Look up the route for the destination IPv4 address. If no route is 1322 * found at all, return NULL to the caller. 1323 */ 1324 if ((route = route_lookup_v4(dst)) == NULL) { 1325 route_miss_v4(dst); 1326 1327 return NULL; 1328 } 1329 1330 /* 1331 * For now, we increase the use counter only for actual route lookups, 1332 * and not for gateway lookups or user queries. As of writing, 1333 * route(8) does not print this number anyway.. 1334 */ 1335 route->re_use++; 1336 1337 /* 1338 * For all packets that are supposed to be rejected or blackholed, use 1339 * a loopback interface, regardless of the interface to which the route 1340 * is associated (even though it will typically be lo0 anyway). The 1341 * reason for this is that on packet output, we perform another route 1342 * route lookup just to check for rejection/blackholing, but for 1343 * efficiency reasons, we limit such checks to loopback interfaces: 1344 * loopback traffic will typically use only one IP address anyway, thus 1345 * limiting route misses from such rejection/blackhole route lookups as 1346 * much as we can. The lookup is implemented in route_output_v4(). We 1347 * divert only if the target interface is not a loopback interface 1348 * already, mainly to allow userland tests to create blackhole routes 1349 * to a specific loopback interface for testing purposes. 1350 * 1351 * It is not correct to return NULL for RTF_REJECT routes here, because 1352 * this could cause e.g. connect() calls to fail immediately, which is 1353 * not how rejection should work. Related: a previous incarnation of 1354 * support for these flags used a dedicated netif to eliminate the 1355 * extra route lookup on regular output altogether, but in the current 1356 * situation, that netif would have to be assigned (IPv4 and IPv6) 1357 * addresses in order not to break e.g. connect() in the same way. 1358 */ 1359 if ((route->re_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 1360 !ifdev_is_loopback(route->re_ifdev)) 1361 ifdev = ifdev_get_loopback(); 1362 else 1363 ifdev = route->re_ifdev; 1364 1365 return ifdev_get_netif(ifdev); 1366 } 1367 1368 /* 1369 * lwIP IPv4 routing hook. Since this hook is called only from lwIP's own 1370 * ip4_route() implementation, this hook must never fire. If it does, either 1371 * something is wrong with overriding ip4_route(), or lwIP added other places 1372 * from which this hook is called. Both cases are highly problematic and must 1373 * be resolved somehow, which is why we simply call panic() here. 1374 */ 1375 struct netif * 1376 lwip_hook_ip4_route(const ip4_addr_t * dst) 1377 { 1378 1379 panic("IPv4 routing hook called - this should not happen!"); 1380 } 1381 1382 /* 1383 * lwIP IPv4 ARP gateway hook. 1384 */ 1385 const ip4_addr_t * 1386 lwip_hook_etharp_get_gw(struct netif * netif, const ip4_addr_t * ip4addr) 1387 { 1388 static ip4_addr_t gw_addr; /* may be returned to the caller */ 1389 struct route_entry *route; 1390 1391 /* Look up the route for the destination IP address. */ 1392 if ((route = route_lookup_v4(ip4addr)) == NULL) 1393 return NULL; 1394 1395 /* 1396 * This case could only ever trigger as a result of lwIP taking its own 1397 * routing decisions instead of calling the IPv4 routing hook. While 1398 * not impossible, such cases should be extremely rare. We cannot 1399 * provide a meaningful gateway address in this case either, though. 1400 */ 1401 if (route->re_ifdev != netif_get_ifdev(netif)) { 1402 printf("LWIP: unexpected interface for gateway lookup\n"); 1403 1404 return NULL; 1405 } 1406 1407 /* 1408 * If this route has a gateway, return the IP address of the gateway. 1409 * Otherwise, the route is for a local network, and we would typically 1410 * not get here because lwIP performs the local-network check itself. 1411 * It is possible that the local network consists of more than one IP 1412 * range, and the user has configured a route for the other range. In 1413 * that case, return the IP address of the actual destination. 1414 * 1415 * We store a packed version of the IPv4 address, so reconstruct the 1416 * unpacked version to a static variable first - for consistency with 1417 * the IPv6 code. 1418 */ 1419 if (route->re_flags & RTF_GATEWAY) { 1420 ip4_addr_copy(gw_addr, route->re_gw4); 1421 1422 return &gw_addr; 1423 } else 1424 return ip4addr; 1425 } 1426 1427 /* 1428 * lwIP IPv6 routing function. Given an IPv6 source and destination address, 1429 * look up and return the target interface, or NULL if there is no route to the 1430 * address. Our routing algorithm is destination-based, meaning that the 1431 * source address must be considered only to resolve zone ambiguity. 1432 * 1433 * This is a full replacement of the corresponding lwIP function, which should 1434 * be overridden with weak symbols, using patches against the lwIP source code. 1435 * As such, the lwIP headers should already provide the correct prototype for 1436 * this function. If not, something will have changed in the lwIP 1437 * implementation, and this code must be revised accordingly. 1438 */ 1439 struct netif * 1440 ip6_route(const ip6_addr_t * src, const ip6_addr_t * dst) 1441 { 1442 struct route_entry *route; 1443 struct ifdev *ifdev; 1444 ip6_addr_t dst_addr; 1445 uint32_t zone; 1446 1447 assert(src != NULL); 1448 assert(dst != NULL); 1449 1450 /* 1451 * If the destination address is scoped but has no zone, use the source 1452 * address to determine a zone, which we then set on the destination 1453 * address to find the route, if successful. Obviously, the interface 1454 * is not going to be different from the zone, but we do need to check 1455 * other aspects of the route (e.g., one might want to null-route all 1456 * multicast traffic). In the case that no source address is given at 1457 * all, first see if the destination address happens to be a locally 1458 * assigned address. In theory this could yield multiple matches, so 1459 * pick the first one. If not even that helps, we have absolutely 1460 * nothing we can use to refine route selection. We could pick an 1461 * arbitrary interface in that case, but we currently don't. 1462 */ 1463 zone = IP6_NO_ZONE; 1464 1465 if (ip6_addr_lacks_zone(dst, IP6_UNKNOWN)) { 1466 if (ip6_addr_has_zone(src)) 1467 zone = ip6_addr_zone(src); 1468 else if (!ip6_addr_isany(src)) { 1469 if ((ifdev = ifaddr_v6_map_by_addr(src)) == NULL) 1470 return NULL; /* should never happen */ 1471 zone = ifdev_get_index(ifdev); 1472 } else { 1473 if ((ifdev = ifaddr_v6_map_by_addr(dst)) != NULL) 1474 zone = ifdev_get_index(ifdev); 1475 else 1476 return NULL; /* TODO: try harder */ 1477 } 1478 1479 if (zone != IP6_NO_ZONE) { 1480 dst_addr = *dst; 1481 1482 ip6_addr_set_zone(&dst_addr, zone); 1483 1484 dst = &dst_addr; 1485 } 1486 } 1487 1488 route = route_lookup_v6(dst); 1489 1490 /* 1491 * Look up the route for the destination IPv6 address. If no route is 1492 * found at all, return NULL to the caller. 1493 */ 1494 if (route == NULL) { 1495 /* 1496 * Since we rely on userland to create routes for on-link 1497 * prefixes and default routers, we do not have to call lwIP's 1498 * nd6_find_route() here. 1499 */ 1500 1501 /* Generate an RTM_MISS message. */ 1502 route_miss_v6(dst); 1503 1504 return NULL; 1505 } 1506 1507 /* 1508 * We have found a route based on the destination address. If we did 1509 * not pick the destination address zone based on the source address, 1510 * we should now check for source address zone violations. Note that 1511 * if even the destination address zone violates its target interface, 1512 * this case will be caught by route_lookup_v6(). 1513 */ 1514 if (zone == IP6_NO_ZONE && 1515 ifaddr_is_zone_mismatch(src, route->re_ifdev)) 1516 return NULL; 1517 1518 route->re_use++; 1519 1520 /* 1521 * See ip4_route() for an explanation of the use of loopback here. For 1522 * the IPv6 case, the matching logic is in route_output_v6(). 1523 */ 1524 if ((route->re_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 1525 !ifdev_is_loopback(route->re_ifdev)) 1526 ifdev = ifdev_get_loopback(); 1527 else 1528 ifdev = route->re_ifdev; 1529 1530 /* 1531 * If the selected interface would cause the destination address to 1532 * leave its zone, fail route selection altogether. This case may 1533 * trigger especially for reject routes, for which the interface change 1534 * to loopback may introduce a zone violation. 1535 */ 1536 if (ip6_addr_has_zone(dst) && 1537 !ip6_addr_test_zone(dst, ifdev_get_netif(ifdev))) 1538 return NULL; 1539 1540 return ifdev_get_netif(ifdev); 1541 } 1542 1543 /* 1544 * lwIP IPv6 (source) routing hook. Since this hook is called only from lwIP's 1545 * own ip6_route() implementation, this hook must never fire. If it does, 1546 * either something is wrong with overriding ip6_route(), or lwIP added other 1547 * places from which this hook is called. Both cases are highly problematic 1548 * and must be resolved somehow, which is why we simply call panic() here. 1549 */ 1550 struct netif * 1551 lwip_hook_ip6_route(const ip6_addr_t * src, const ip6_addr_t * dst) 1552 { 1553 1554 panic("IPv6 routing hook called - this should not happen!"); 1555 } 1556 1557 /* 1558 * lwIP IPv6 ND6 gateway hook. 1559 */ 1560 const ip6_addr_t * 1561 lwip_hook_nd6_get_gw(struct netif * netif, const ip6_addr_t * ip6addr) 1562 { 1563 static ip6_addr_t gw_addr; /* may be returned to the caller */ 1564 struct route_entry *route; 1565 struct ifdev *ifdev; 1566 1567 ifdev = netif_get_ifdev(netif); 1568 assert(ifdev != NULL); 1569 1570 /* Look up the route for the destination IP address. */ 1571 if ((route = route_lookup_v6(ip6addr)) == NULL) 1572 return NULL; 1573 1574 /* As for IPv4. */ 1575 if (route->re_ifdev != ifdev) { 1576 printf("LWIP: unexpected interface for gateway lookup\n"); 1577 1578 return NULL; 1579 } 1580 1581 /* 1582 * We save memory by storing a packed (zoneless) version of the IPv6 1583 * gateway address. That means we cannot return a pointer to it here. 1584 * Instead, we have to resort to expanding the address into a static 1585 * variable. The caller will immediately make a copy anyway, though. 1586 */ 1587 if (route->re_flags & RTF_GATEWAY) { 1588 ip6_addr_copy_from_packed(gw_addr, route->re_gw6); 1589 ip6_addr_assign_zone(&gw_addr, IP6_UNKNOWN, netif); 1590 1591 return &gw_addr; 1592 } else 1593 return ip6addr; 1594 } 1595 1596 /* 1597 * Check whether a packet is allowed to be sent to the given destination IPv4 1598 * address 'ipaddr' on the interface 'ifdev', according to route information. 1599 * Return TRUE if the packet should be sent. Return FALSE if the packet should 1600 * be rejected or discarded, with 'err' set to the error to return to lwIP. 1601 */ 1602 int 1603 route_output_v4(struct ifdev * ifdev, const ip4_addr_t * ipaddr, err_t * err) 1604 { 1605 const struct route_entry *route; 1606 1607 /* See if we should reject/blackhole packets to this destination. */ 1608 if (ifdev_is_loopback(ifdev) && 1609 (route = route_lookup_v4(ipaddr)) != NULL && 1610 (route->re_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 1611 if (route->re_flags & RTF_REJECT) 1612 *err = ERR_RTE; 1613 else 1614 *err = ERR_OK; 1615 1616 return FALSE; 1617 } 1618 1619 return TRUE; 1620 } 1621 1622 /* 1623 * Check whether a packet is allowed to be sent to the given destination IPv6 1624 * address 'ipaddr' on the interface 'ifdev', according to route information. 1625 * Return TRUE if the packet should be sent. Return FALSE if the packet should 1626 * be rejected or discarded, with 'err' set to the error to return to lwIP. 1627 */ 1628 int 1629 route_output_v6(struct ifdev * ifdev, const ip6_addr_t * ipaddr, err_t * err) 1630 { 1631 const struct route_entry *route; 1632 1633 /* Do one more zone violation test, just in case. It's cheap. */ 1634 if (ip6_addr_has_zone(ipaddr) && 1635 !ip6_addr_test_zone(ipaddr, ifdev_get_netif(ifdev))) { 1636 *err = ERR_RTE; 1637 1638 return FALSE; 1639 } 1640 1641 /* See if we should reject/blackhole packets to this destination. */ 1642 if (ifdev_is_loopback(ifdev) && 1643 (route = route_lookup_v6(ipaddr)) != NULL && 1644 (route->re_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 1645 if (route->re_flags & RTF_REJECT) 1646 *err = ERR_RTE; 1647 else 1648 *err = ERR_OK; 1649 1650 return FALSE; 1651 } 1652 1653 return TRUE; 1654 } 1655