10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51676Sjpk * Common Development and Distribution License (the "License").
61676Sjpk * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
2212445SSowmini.Varadhan@Sun.COM * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate * Copyright (c) 1990 Mentat Inc.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate * This file contains routines that manipulate Internet Routing Entries (IREs).
280Sstevel@tonic-gate */
290Sstevel@tonic-gate #include <sys/types.h>
300Sstevel@tonic-gate #include <sys/stream.h>
310Sstevel@tonic-gate #include <sys/stropts.h>
320Sstevel@tonic-gate #include <sys/ddi.h>
330Sstevel@tonic-gate #include <sys/cmn_err.h>
340Sstevel@tonic-gate
350Sstevel@tonic-gate #include <sys/systm.h>
360Sstevel@tonic-gate #include <sys/param.h>
370Sstevel@tonic-gate #include <sys/socket.h>
380Sstevel@tonic-gate #include <net/if.h>
390Sstevel@tonic-gate #include <net/route.h>
400Sstevel@tonic-gate #include <netinet/in.h>
410Sstevel@tonic-gate #include <net/if_dl.h>
420Sstevel@tonic-gate #include <netinet/ip6.h>
430Sstevel@tonic-gate #include <netinet/icmp6.h>
440Sstevel@tonic-gate
450Sstevel@tonic-gate #include <inet/common.h>
460Sstevel@tonic-gate #include <inet/mi.h>
470Sstevel@tonic-gate #include <inet/ip.h>
480Sstevel@tonic-gate #include <inet/ip6.h>
490Sstevel@tonic-gate #include <inet/ip_ndp.h>
500Sstevel@tonic-gate #include <inet/ip_if.h>
510Sstevel@tonic-gate #include <inet/ip_ire.h>
520Sstevel@tonic-gate #include <inet/ipclassifier.h>
530Sstevel@tonic-gate #include <inet/nd.h>
5412016SGirish.Moodalbail@Sun.COM #include <inet/tunables.h>
550Sstevel@tonic-gate #include <sys/kmem.h>
560Sstevel@tonic-gate #include <sys/zone.h>
570Sstevel@tonic-gate
581676Sjpk #include <sys/tsol/label.h>
591676Sjpk #include <sys/tsol/tnet.h>
601676Sjpk
6111042SErik.Nordmark@Sun.COM #define IS_DEFAULT_ROUTE_V6(ire) \
6211042SErik.Nordmark@Sun.COM (((ire)->ire_type & IRE_DEFAULT) || \
6311042SErik.Nordmark@Sun.COM (((ire)->ire_type & IRE_INTERFACE) && \
6411042SErik.Nordmark@Sun.COM (IN6_IS_ADDR_UNSPECIFIED(&(ire)->ire_addr_v6))))
6511042SErik.Nordmark@Sun.COM
660Sstevel@tonic-gate static ire_t ire_null;
670Sstevel@tonic-gate
6811042SErik.Nordmark@Sun.COM static ire_t *
6911042SErik.Nordmark@Sun.COM ire_ftable_lookup_impl_v6(const in6_addr_t *addr, const in6_addr_t *mask,
7011042SErik.Nordmark@Sun.COM const in6_addr_t *gateway, int type, const ill_t *ill,
7111042SErik.Nordmark@Sun.COM zoneid_t zoneid, const ts_label_t *tsl, int flags,
7211042SErik.Nordmark@Sun.COM ip_stack_t *ipst);
730Sstevel@tonic-gate
740Sstevel@tonic-gate /*
750Sstevel@tonic-gate * Initialize the ire that is specific to IPv6 part and call
760Sstevel@tonic-gate * ire_init_common to finish it.
7711042SErik.Nordmark@Sun.COM * Returns zero or errno.
780Sstevel@tonic-gate */
7911042SErik.Nordmark@Sun.COM int
ire_init_v6(ire_t * ire,const in6_addr_t * v6addr,const in6_addr_t * v6mask,const in6_addr_t * v6gateway,ushort_t type,ill_t * ill,zoneid_t zoneid,uint_t flags,tsol_gc_t * gc,ip_stack_t * ipst)804714Ssowmini ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask,
8111042SErik.Nordmark@Sun.COM const in6_addr_t *v6gateway, ushort_t type, ill_t *ill,
8211042SErik.Nordmark@Sun.COM zoneid_t zoneid, uint_t flags, tsol_gc_t *gc, ip_stack_t *ipst)
830Sstevel@tonic-gate {
8411042SErik.Nordmark@Sun.COM int error;
852535Ssangeeta
861676Sjpk /*
8711042SErik.Nordmark@Sun.COM * Reject IRE security attmakeribute creation/initialization
881676Sjpk * if system is not running in Trusted mode.
891676Sjpk */
9011042SErik.Nordmark@Sun.COM if (gc != NULL && !is_system_labeled())
9111042SErik.Nordmark@Sun.COM return (EINVAL);
920Sstevel@tonic-gate
933448Sdh155122 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced);
9411042SErik.Nordmark@Sun.COM if (v6addr != NULL)
9511042SErik.Nordmark@Sun.COM ire->ire_addr_v6 = *v6addr;
960Sstevel@tonic-gate if (v6gateway != NULL)
970Sstevel@tonic-gate ire->ire_gateway_addr_v6 = *v6gateway;
980Sstevel@tonic-gate
9911042SErik.Nordmark@Sun.COM /* Make sure we don't have stray values in some fields */
10011042SErik.Nordmark@Sun.COM switch (type) {
10111042SErik.Nordmark@Sun.COM case IRE_LOOPBACK:
10211042SErik.Nordmark@Sun.COM case IRE_HOST:
10311042SErik.Nordmark@Sun.COM case IRE_LOCAL:
10411042SErik.Nordmark@Sun.COM case IRE_IF_CLONE:
10511042SErik.Nordmark@Sun.COM ire->ire_mask_v6 = ipv6_all_ones;
10611042SErik.Nordmark@Sun.COM ire->ire_masklen = IPV6_ABITS;
10711042SErik.Nordmark@Sun.COM break;
10811042SErik.Nordmark@Sun.COM case IRE_PREFIX:
10911042SErik.Nordmark@Sun.COM case IRE_DEFAULT:
11011042SErik.Nordmark@Sun.COM case IRE_IF_RESOLVER:
11111042SErik.Nordmark@Sun.COM case IRE_IF_NORESOLVER:
11211042SErik.Nordmark@Sun.COM if (v6mask != NULL) {
11311042SErik.Nordmark@Sun.COM ire->ire_mask_v6 = *v6mask;
11411042SErik.Nordmark@Sun.COM ire->ire_masklen =
11511042SErik.Nordmark@Sun.COM ip_mask_to_plen_v6(&ire->ire_mask_v6);
11611042SErik.Nordmark@Sun.COM }
11711042SErik.Nordmark@Sun.COM break;
11811042SErik.Nordmark@Sun.COM case IRE_MULTICAST:
11911042SErik.Nordmark@Sun.COM case IRE_NOROUTE:
12011042SErik.Nordmark@Sun.COM ASSERT(v6mask == NULL);
12111042SErik.Nordmark@Sun.COM break;
12211042SErik.Nordmark@Sun.COM default:
12311042SErik.Nordmark@Sun.COM ASSERT(0);
12411042SErik.Nordmark@Sun.COM return (EINVAL);
1250Sstevel@tonic-gate }
1260Sstevel@tonic-gate
12711042SErik.Nordmark@Sun.COM error = ire_init_common(ire, type, ill, zoneid, flags, IPV6_VERSION,
12811042SErik.Nordmark@Sun.COM gc, ipst);
12911042SErik.Nordmark@Sun.COM if (error != NULL)
13011042SErik.Nordmark@Sun.COM return (error);
1310Sstevel@tonic-gate
13211042SErik.Nordmark@Sun.COM /* Determine which function pointers to use */
13311042SErik.Nordmark@Sun.COM ire->ire_postfragfn = ip_xmit; /* Common case */
1340Sstevel@tonic-gate
13511042SErik.Nordmark@Sun.COM switch (ire->ire_type) {
13611042SErik.Nordmark@Sun.COM case IRE_LOCAL:
13711042SErik.Nordmark@Sun.COM ire->ire_sendfn = ire_send_local_v6;
13811042SErik.Nordmark@Sun.COM ire->ire_recvfn = ire_recv_local_v6;
13911042SErik.Nordmark@Sun.COM ASSERT(ire->ire_ill != NULL);
14011076SCathy.Zhou@Sun.COM if (ire->ire_ill->ill_flags & ILLF_NOACCEPT)
14111042SErik.Nordmark@Sun.COM ire->ire_recvfn = ire_recv_noaccept_v6;
14211042SErik.Nordmark@Sun.COM break;
14311042SErik.Nordmark@Sun.COM case IRE_LOOPBACK:
14411042SErik.Nordmark@Sun.COM ire->ire_sendfn = ire_send_local_v6;
14511042SErik.Nordmark@Sun.COM ire->ire_recvfn = ire_recv_loopback_v6;
14611042SErik.Nordmark@Sun.COM break;
14711042SErik.Nordmark@Sun.COM case IRE_MULTICAST:
14811042SErik.Nordmark@Sun.COM ire->ire_postfragfn = ip_postfrag_loopcheck;
14911042SErik.Nordmark@Sun.COM ire->ire_sendfn = ire_send_multicast_v6;
15011042SErik.Nordmark@Sun.COM ire->ire_recvfn = ire_recv_multicast_v6;
15111042SErik.Nordmark@Sun.COM break;
15211042SErik.Nordmark@Sun.COM default:
15311042SErik.Nordmark@Sun.COM /*
15411042SErik.Nordmark@Sun.COM * For IRE_IF_ALL and IRE_OFFLINK we forward received
15511042SErik.Nordmark@Sun.COM * packets by default.
15611042SErik.Nordmark@Sun.COM */
15711042SErik.Nordmark@Sun.COM ire->ire_sendfn = ire_send_wire_v6;
15811042SErik.Nordmark@Sun.COM ire->ire_recvfn = ire_recv_forward_v6;
15911042SErik.Nordmark@Sun.COM break;
1600Sstevel@tonic-gate }
16111042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
16211042SErik.Nordmark@Sun.COM ire->ire_sendfn = ire_send_noroute_v6;
16311042SErik.Nordmark@Sun.COM ire->ire_recvfn = ire_recv_noroute_v6;
16411042SErik.Nordmark@Sun.COM } else if (ire->ire_flags & RTF_MULTIRT) {
16511042SErik.Nordmark@Sun.COM ire->ire_postfragfn = ip_postfrag_multirt_v6;
16611042SErik.Nordmark@Sun.COM ire->ire_sendfn = ire_send_multirt_v6;
16711042SErik.Nordmark@Sun.COM ire->ire_recvfn = ire_recv_multirt_v6;
1680Sstevel@tonic-gate }
16911042SErik.Nordmark@Sun.COM ire->ire_nce_capable = ire_determine_nce_capable(ire);
17011042SErik.Nordmark@Sun.COM return (0);
1710Sstevel@tonic-gate }
1720Sstevel@tonic-gate
1730Sstevel@tonic-gate /*
1740Sstevel@tonic-gate * ire_create_v6 is called to allocate and initialize a new IRE.
1750Sstevel@tonic-gate *
1760Sstevel@tonic-gate * NOTE : This is called as writer sometimes though not required
1770Sstevel@tonic-gate * by this function.
1780Sstevel@tonic-gate */
1794714Ssowmini /* ARGSUSED */
1800Sstevel@tonic-gate ire_t *
ire_create_v6(const in6_addr_t * v6addr,const in6_addr_t * v6mask,const in6_addr_t * v6gateway,ushort_t type,ill_t * ill,zoneid_t zoneid,uint_t flags,tsol_gc_t * gc,ip_stack_t * ipst)1810Sstevel@tonic-gate ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
18211042SErik.Nordmark@Sun.COM const in6_addr_t *v6gateway, ushort_t type, ill_t *ill, zoneid_t zoneid,
18311042SErik.Nordmark@Sun.COM uint_t flags, tsol_gc_t *gc, ip_stack_t *ipst)
1840Sstevel@tonic-gate {
1850Sstevel@tonic-gate ire_t *ire;
18611042SErik.Nordmark@Sun.COM int error;
1870Sstevel@tonic-gate
1880Sstevel@tonic-gate ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
1890Sstevel@tonic-gate
1900Sstevel@tonic-gate ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP);
1910Sstevel@tonic-gate if (ire == NULL) {
19211042SErik.Nordmark@Sun.COM DTRACE_PROBE(kmem__cache__alloc);
1930Sstevel@tonic-gate return (NULL);
1940Sstevel@tonic-gate }
1950Sstevel@tonic-gate *ire = ire_null;
1960Sstevel@tonic-gate
19711042SErik.Nordmark@Sun.COM error = ire_init_v6(ire, v6addr, v6mask, v6gateway,
19811042SErik.Nordmark@Sun.COM type, ill, zoneid, flags, gc, ipst);
1990Sstevel@tonic-gate
20011042SErik.Nordmark@Sun.COM if (error != 0) {
20111042SErik.Nordmark@Sun.COM DTRACE_PROBE2(ire__init__v6, ire_t *, ire, int, error);
2020Sstevel@tonic-gate kmem_cache_free(ire_cache, ire);
2030Sstevel@tonic-gate return (NULL);
2040Sstevel@tonic-gate }
2050Sstevel@tonic-gate return (ire);
2060Sstevel@tonic-gate }
2070Sstevel@tonic-gate
2080Sstevel@tonic-gate /*
20911042SErik.Nordmark@Sun.COM * Find the ill matching a multicast group.
2100Sstevel@tonic-gate * Allows different routes for multicast addresses
2110Sstevel@tonic-gate * in the unicast routing table (akin to FF::0/8 but could be more specific)
2120Sstevel@tonic-gate * which point at different interfaces. This is used when IPV6_MULTICAST_IF
2130Sstevel@tonic-gate * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't
2140Sstevel@tonic-gate * specify the interface to join on.
2150Sstevel@tonic-gate *
21611042SErik.Nordmark@Sun.COM * Supports link-local addresses by using ire_route_recursive which follows
21711042SErik.Nordmark@Sun.COM * the ill when recursing.
21811042SErik.Nordmark@Sun.COM *
21911042SErik.Nordmark@Sun.COM * To handle CGTP, since we don't have a separate IRE_MULTICAST for each group
22011042SErik.Nordmark@Sun.COM * and the MULTIRT property can be different for different groups, we
22111042SErik.Nordmark@Sun.COM * extract RTF_MULTIRT from the special unicast route added for a group
22211042SErik.Nordmark@Sun.COM * with CGTP and pass that back in the multirtp argument.
22311042SErik.Nordmark@Sun.COM * This is used in ip_set_destination etc to set ixa_postfragfn for multicast.
22411042SErik.Nordmark@Sun.COM * We have a setsrcp argument for the same reason.
2250Sstevel@tonic-gate */
22611042SErik.Nordmark@Sun.COM ill_t *
ire_lookup_multi_ill_v6(const in6_addr_t * group,zoneid_t zoneid,ip_stack_t * ipst,boolean_t * multirtp,in6_addr_t * setsrcp)22711042SErik.Nordmark@Sun.COM ire_lookup_multi_ill_v6(const in6_addr_t *group, zoneid_t zoneid,
22811042SErik.Nordmark@Sun.COM ip_stack_t *ipst, boolean_t *multirtp, in6_addr_t *setsrcp)
2290Sstevel@tonic-gate {
2300Sstevel@tonic-gate ire_t *ire;
23111042SErik.Nordmark@Sun.COM ill_t *ill;
2320Sstevel@tonic-gate
23311042SErik.Nordmark@Sun.COM ire = ire_route_recursive_v6(group, 0, NULL, zoneid, NULL,
23411457SErik.Nordmark@Sun.COM MATCH_IRE_DSTONLY, IRR_NONE, 0, ipst, setsrcp, NULL, NULL);
23511042SErik.Nordmark@Sun.COM ASSERT(ire != NULL);
2360Sstevel@tonic-gate
23711042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2380Sstevel@tonic-gate ire_refrele(ire);
2390Sstevel@tonic-gate return (NULL);
2400Sstevel@tonic-gate }
2410Sstevel@tonic-gate
24211042SErik.Nordmark@Sun.COM if (multirtp != NULL)
24311042SErik.Nordmark@Sun.COM *multirtp = (ire->ire_flags & RTF_MULTIRT) != 0;
2440Sstevel@tonic-gate
24511042SErik.Nordmark@Sun.COM ill = ire_nexthop_ill(ire);
24611042SErik.Nordmark@Sun.COM ire_refrele(ire);
24711042SErik.Nordmark@Sun.COM return (ill);
2480Sstevel@tonic-gate }
2490Sstevel@tonic-gate
2500Sstevel@tonic-gate /*
2510Sstevel@tonic-gate * This function takes a mask and returns number of bits set in the
2520Sstevel@tonic-gate * mask (the represented prefix length). Assumes a contiguous mask.
2530Sstevel@tonic-gate */
2540Sstevel@tonic-gate int
ip_mask_to_plen_v6(const in6_addr_t * v6mask)2550Sstevel@tonic-gate ip_mask_to_plen_v6(const in6_addr_t *v6mask)
2560Sstevel@tonic-gate {
2570Sstevel@tonic-gate int bits;
2580Sstevel@tonic-gate int plen = IPV6_ABITS;
2590Sstevel@tonic-gate int i;
2600Sstevel@tonic-gate
2610Sstevel@tonic-gate for (i = 3; i >= 0; i--) {
2620Sstevel@tonic-gate if (v6mask->s6_addr32[i] == 0) {
2630Sstevel@tonic-gate plen -= 32;
2640Sstevel@tonic-gate continue;
2650Sstevel@tonic-gate }
2660Sstevel@tonic-gate bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
2670Sstevel@tonic-gate if (bits == 0)
2680Sstevel@tonic-gate break;
2690Sstevel@tonic-gate plen -= bits;
2700Sstevel@tonic-gate }
2710Sstevel@tonic-gate
2720Sstevel@tonic-gate return (plen);
2730Sstevel@tonic-gate }
2740Sstevel@tonic-gate
2750Sstevel@tonic-gate /*
2760Sstevel@tonic-gate * Convert a prefix length to the mask for that prefix.
2770Sstevel@tonic-gate * Returns the argument bitmask.
2780Sstevel@tonic-gate */
2790Sstevel@tonic-gate in6_addr_t *
ip_plen_to_mask_v6(uint_t plen,in6_addr_t * bitmask)2800Sstevel@tonic-gate ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask)
2810Sstevel@tonic-gate {
2820Sstevel@tonic-gate uint32_t *ptr;
2830Sstevel@tonic-gate
2840Sstevel@tonic-gate if (plen < 0 || plen > IPV6_ABITS)
2850Sstevel@tonic-gate return (NULL);
2860Sstevel@tonic-gate *bitmask = ipv6_all_zeros;
28711042SErik.Nordmark@Sun.COM if (plen == 0)
28811042SErik.Nordmark@Sun.COM return (bitmask);
2890Sstevel@tonic-gate
2900Sstevel@tonic-gate ptr = (uint32_t *)bitmask;
2910Sstevel@tonic-gate while (plen > 32) {
2920Sstevel@tonic-gate *ptr++ = 0xffffffffU;
2930Sstevel@tonic-gate plen -= 32;
2940Sstevel@tonic-gate }
2950Sstevel@tonic-gate *ptr = htonl(0xffffffffU << (32 - plen));
2960Sstevel@tonic-gate return (bitmask);
2970Sstevel@tonic-gate }
2980Sstevel@tonic-gate
2990Sstevel@tonic-gate /*
30011042SErik.Nordmark@Sun.COM * Add a fully initialized IPv6 IRE to the forwarding table.
30111042SErik.Nordmark@Sun.COM * This returns NULL on failure, or a held IRE on success.
30211042SErik.Nordmark@Sun.COM * Normally the returned IRE is the same as the argument. But a different
30311042SErik.Nordmark@Sun.COM * IRE will be returned if the added IRE is deemed identical to an existing
30411042SErik.Nordmark@Sun.COM * one. In that case ire_identical_ref will be increased.
30511042SErik.Nordmark@Sun.COM * The caller always needs to do an ire_refrele() on the returned IRE.
3060Sstevel@tonic-gate */
30711042SErik.Nordmark@Sun.COM ire_t *
ire_add_v6(ire_t * ire)30811042SErik.Nordmark@Sun.COM ire_add_v6(ire_t *ire)
3090Sstevel@tonic-gate {
3100Sstevel@tonic-gate ire_t *ire1;
3110Sstevel@tonic-gate int mask_table_index;
3120Sstevel@tonic-gate irb_t *irb_ptr;
3130Sstevel@tonic-gate ire_t **irep;
31411042SErik.Nordmark@Sun.COM int match_flags;
3150Sstevel@tonic-gate int error;
3163448Sdh155122 ip_stack_t *ipst = ire->ire_ipst;
3170Sstevel@tonic-gate
3180Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION);
3190Sstevel@tonic-gate
3200Sstevel@tonic-gate /* Make sure the address is properly masked. */
3210Sstevel@tonic-gate V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6);
3220Sstevel@tonic-gate
32311042SErik.Nordmark@Sun.COM mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6);
32411042SErik.Nordmark@Sun.COM if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == NULL) {
32511042SErik.Nordmark@Sun.COM irb_t *ptr;
32611042SErik.Nordmark@Sun.COM int i;
3270Sstevel@tonic-gate
32811042SErik.Nordmark@Sun.COM ptr = (irb_t *)mi_zalloc((ipst->ips_ip6_ftable_hash_size *
32911042SErik.Nordmark@Sun.COM sizeof (irb_t)));
33011042SErik.Nordmark@Sun.COM if (ptr == NULL) {
33111042SErik.Nordmark@Sun.COM ire_delete(ire);
33211042SErik.Nordmark@Sun.COM return (NULL);
33311042SErik.Nordmark@Sun.COM }
33411042SErik.Nordmark@Sun.COM for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) {
33511042SErik.Nordmark@Sun.COM rw_init(&ptr[i].irb_lock, NULL, RW_DEFAULT, NULL);
33611463SSowmini.Varadhan@Sun.COM ptr[i].irb_ipst = ipst;
3370Sstevel@tonic-gate }
33811042SErik.Nordmark@Sun.COM mutex_enter(&ipst->ips_ire_ft_init_lock);
33911042SErik.Nordmark@Sun.COM if (ipst->ips_ip_forwarding_table_v6[mask_table_index] ==
34011042SErik.Nordmark@Sun.COM NULL) {
34111042SErik.Nordmark@Sun.COM ipst->ips_ip_forwarding_table_v6[mask_table_index] =
34211042SErik.Nordmark@Sun.COM ptr;
34311042SErik.Nordmark@Sun.COM mutex_exit(&ipst->ips_ire_ft_init_lock);
34411042SErik.Nordmark@Sun.COM } else {
34511042SErik.Nordmark@Sun.COM /*
34611042SErik.Nordmark@Sun.COM * Some other thread won the race in
34711042SErik.Nordmark@Sun.COM * initializing the forwarding table at the
34811042SErik.Nordmark@Sun.COM * same index.
34911042SErik.Nordmark@Sun.COM */
35011042SErik.Nordmark@Sun.COM mutex_exit(&ipst->ips_ire_ft_init_lock);
35111042SErik.Nordmark@Sun.COM for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) {
35211042SErik.Nordmark@Sun.COM rw_destroy(&ptr[i].irb_lock);
35311042SErik.Nordmark@Sun.COM }
35411042SErik.Nordmark@Sun.COM mi_free(ptr);
3550Sstevel@tonic-gate }
3560Sstevel@tonic-gate }
35711042SErik.Nordmark@Sun.COM irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][
35811042SErik.Nordmark@Sun.COM IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6,
35911042SErik.Nordmark@Sun.COM ipst->ips_ip6_ftable_hash_size)]);
3600Sstevel@tonic-gate
36111042SErik.Nordmark@Sun.COM match_flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW);
36211042SErik.Nordmark@Sun.COM if (ire->ire_ill != NULL)
36311042SErik.Nordmark@Sun.COM match_flags |= MATCH_IRE_ILL;
3640Sstevel@tonic-gate /*
36511042SErik.Nordmark@Sun.COM * Start the atomic add of the ire. Grab the bucket lock and the
36611042SErik.Nordmark@Sun.COM * ill lock. Check for condemned.
3670Sstevel@tonic-gate */
36811042SErik.Nordmark@Sun.COM error = ire_atomic_start(irb_ptr, ire);
36911042SErik.Nordmark@Sun.COM if (error != 0) {
37011042SErik.Nordmark@Sun.COM ire_delete(ire);
37111042SErik.Nordmark@Sun.COM return (NULL);
37211042SErik.Nordmark@Sun.COM }
3738485SPeter.Memishian@Sun.COM
3740Sstevel@tonic-gate /*
3758485SPeter.Memishian@Sun.COM * If we are creating a hidden IRE, make sure we search for
3768485SPeter.Memishian@Sun.COM * hidden IREs when searching for duplicates below.
3778485SPeter.Memishian@Sun.COM * Otherwise, we might find an IRE on some other interface
3788485SPeter.Memishian@Sun.COM * that's not marked hidden.
3790Sstevel@tonic-gate */
38011042SErik.Nordmark@Sun.COM if (ire->ire_testhidden)
38111042SErik.Nordmark@Sun.COM match_flags |= MATCH_IRE_TESTHIDDEN;
3820Sstevel@tonic-gate
3830Sstevel@tonic-gate /*
3840Sstevel@tonic-gate * Atomically check for duplicate and insert in the table.
3850Sstevel@tonic-gate */
3860Sstevel@tonic-gate for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
38711042SErik.Nordmark@Sun.COM if (IRE_IS_CONDEMNED(ire1))
38811042SErik.Nordmark@Sun.COM continue;
38911042SErik.Nordmark@Sun.COM /*
39011042SErik.Nordmark@Sun.COM * Here we need an exact match on zoneid, i.e.,
39111042SErik.Nordmark@Sun.COM * ire_match_args doesn't fit.
39211042SErik.Nordmark@Sun.COM */
39311042SErik.Nordmark@Sun.COM if (ire1->ire_zoneid != ire->ire_zoneid)
39411042SErik.Nordmark@Sun.COM continue;
39511042SErik.Nordmark@Sun.COM
39611042SErik.Nordmark@Sun.COM if (ire1->ire_type != ire->ire_type)
3970Sstevel@tonic-gate continue;
3980Sstevel@tonic-gate
39911042SErik.Nordmark@Sun.COM /*
40011042SErik.Nordmark@Sun.COM * Note: We do not allow multiple routes that differ only
40111042SErik.Nordmark@Sun.COM * in the gateway security attributes; such routes are
40211042SErik.Nordmark@Sun.COM * considered duplicates.
40311042SErik.Nordmark@Sun.COM * To change that we explicitly have to treat them as
40411042SErik.Nordmark@Sun.COM * different here.
40511042SErik.Nordmark@Sun.COM */
4060Sstevel@tonic-gate if (ire_match_args_v6(ire1, &ire->ire_addr_v6,
4070Sstevel@tonic-gate &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
40811042SErik.Nordmark@Sun.COM ire->ire_type, ire->ire_ill, ire->ire_zoneid, NULL,
40911042SErik.Nordmark@Sun.COM match_flags)) {
4100Sstevel@tonic-gate /*
4110Sstevel@tonic-gate * Return the old ire after doing a REFHOLD.
4120Sstevel@tonic-gate * As most of the callers continue to use the IRE
4130Sstevel@tonic-gate * after adding, we return a held ire. This will
4140Sstevel@tonic-gate * avoid a lookup in the caller again. If the callers
4150Sstevel@tonic-gate * don't want to use it, they need to do a REFRELE.
41612445SSowmini.Varadhan@Sun.COM *
41712445SSowmini.Varadhan@Sun.COM * We only allow exactly one IRE_IF_CLONE for any dst,
41812445SSowmini.Varadhan@Sun.COM * so, if the is an IF_CLONE, return the ire without
41912445SSowmini.Varadhan@Sun.COM * an identical_ref, but with an ire_ref held.
4200Sstevel@tonic-gate */
42112445SSowmini.Varadhan@Sun.COM if (ire->ire_type != IRE_IF_CLONE) {
42212445SSowmini.Varadhan@Sun.COM atomic_add_32(&ire1->ire_identical_ref, 1);
42312445SSowmini.Varadhan@Sun.COM DTRACE_PROBE2(ire__add__exist, ire_t *, ire1,
42412445SSowmini.Varadhan@Sun.COM ire_t *, ire);
42512445SSowmini.Varadhan@Sun.COM }
4260Sstevel@tonic-gate ip1dbg(("found dup ire existing %p new %p",
4270Sstevel@tonic-gate (void *)ire1, (void *)ire));
42811042SErik.Nordmark@Sun.COM ire_refhold(ire1);
4290Sstevel@tonic-gate ire_atomic_end(irb_ptr, ire);
4300Sstevel@tonic-gate ire_delete(ire);
43111042SErik.Nordmark@Sun.COM return (ire1);
4320Sstevel@tonic-gate }
4330Sstevel@tonic-gate }
4340Sstevel@tonic-gate
43511042SErik.Nordmark@Sun.COM /*
43611042SErik.Nordmark@Sun.COM * Normally we do head insertion since most things do not care about
43711042SErik.Nordmark@Sun.COM * the order of the IREs in the bucket.
43811042SErik.Nordmark@Sun.COM * However, due to shared-IP zones (and restrict_interzone_loopback)
43911042SErik.Nordmark@Sun.COM * we can have an IRE_LOCAL as well as IRE_IF_CLONE for the same
44011042SErik.Nordmark@Sun.COM * address. For that reason we do tail insertion for IRE_IF_CLONE.
44111042SErik.Nordmark@Sun.COM */
44211042SErik.Nordmark@Sun.COM irep = (ire_t **)irb_ptr;
44311042SErik.Nordmark@Sun.COM if (ire->ire_type & IRE_IF_CLONE) {
44411042SErik.Nordmark@Sun.COM while ((ire1 = *irep) != NULL)
44511042SErik.Nordmark@Sun.COM irep = &ire1->ire_next;
4460Sstevel@tonic-gate }
4470Sstevel@tonic-gate /* Insert at *irep */
4480Sstevel@tonic-gate ire1 = *irep;
4490Sstevel@tonic-gate if (ire1 != NULL)
4500Sstevel@tonic-gate ire1->ire_ptpn = &ire->ire_next;
4510Sstevel@tonic-gate ire->ire_next = ire1;
4520Sstevel@tonic-gate /* Link the new one in. */
4530Sstevel@tonic-gate ire->ire_ptpn = irep;
4540Sstevel@tonic-gate /*
4550Sstevel@tonic-gate * ire_walk routines de-reference ire_next without holding
4560Sstevel@tonic-gate * a lock. Before we point to the new ire, we want to make
4570Sstevel@tonic-gate * sure the store that sets the ire_next of the new ire
4580Sstevel@tonic-gate * reaches global visibility, so that ire_walk routines
4590Sstevel@tonic-gate * don't see a truncated list of ires i.e if the ire_next
4600Sstevel@tonic-gate * of the new ire gets set after we do "*irep = ire" due
4610Sstevel@tonic-gate * to re-ordering, the ire_walk thread will see a NULL
4620Sstevel@tonic-gate * once it accesses the ire_next of the new ire.
4630Sstevel@tonic-gate * membar_producer() makes sure that the following store
4640Sstevel@tonic-gate * happens *after* all of the above stores.
4650Sstevel@tonic-gate */
4660Sstevel@tonic-gate membar_producer();
4670Sstevel@tonic-gate *irep = ire;
4680Sstevel@tonic-gate ire->ire_bucket = irb_ptr;
4690Sstevel@tonic-gate /*
4700Sstevel@tonic-gate * We return a bumped up IRE above. Keep it symmetrical
4710Sstevel@tonic-gate * so that the callers will always have to release. This
4720Sstevel@tonic-gate * helps the callers of this function because they continue
4730Sstevel@tonic-gate * to use the IRE after adding and hence they don't have to
4740Sstevel@tonic-gate * lookup again after we return the IRE.
4750Sstevel@tonic-gate *
4760Sstevel@tonic-gate * NOTE : We don't have to use atomics as this is appearing
4770Sstevel@tonic-gate * in the list for the first time and no one else can bump
4780Sstevel@tonic-gate * up the reference count on this yet.
4790Sstevel@tonic-gate */
48011042SErik.Nordmark@Sun.COM ire_refhold_locked(ire);
4813448Sdh155122 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted);
4820Sstevel@tonic-gate irb_ptr->irb_ire_cnt++;
4830Sstevel@tonic-gate
48411042SErik.Nordmark@Sun.COM if (ire->ire_ill != NULL) {
48511042SErik.Nordmark@Sun.COM DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ire->ire_ill,
4866255Ssowmini (char *), "ire", (void *), ire);
48711042SErik.Nordmark@Sun.COM ire->ire_ill->ill_ire_cnt++;
48811042SErik.Nordmark@Sun.COM ASSERT(ire->ire_ill->ill_ire_cnt != 0); /* Wraparound */
4890Sstevel@tonic-gate }
4900Sstevel@tonic-gate ire_atomic_end(irb_ptr, ire);
4910Sstevel@tonic-gate
49211042SErik.Nordmark@Sun.COM /* Make any caching of the IREs be notified or updated */
49311042SErik.Nordmark@Sun.COM ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
4940Sstevel@tonic-gate
49511042SErik.Nordmark@Sun.COM return (ire);
4960Sstevel@tonic-gate }
4970Sstevel@tonic-gate
4980Sstevel@tonic-gate /*
4990Sstevel@tonic-gate * Search for all HOST REDIRECT routes that are
5000Sstevel@tonic-gate * pointing at the specified gateway and
5010Sstevel@tonic-gate * delete them. This routine is called only
5020Sstevel@tonic-gate * when a default gateway is going away.
5030Sstevel@tonic-gate */
5040Sstevel@tonic-gate static void
ire_delete_host_redirects_v6(const in6_addr_t * gateway,ip_stack_t * ipst)5053448Sdh155122 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst)
5060Sstevel@tonic-gate {
5070Sstevel@tonic-gate irb_t *irb_ptr;
5080Sstevel@tonic-gate irb_t *irb;
5090Sstevel@tonic-gate ire_t *ire;
5100Sstevel@tonic-gate in6_addr_t gw_addr_v6;
5110Sstevel@tonic-gate int i;
5120Sstevel@tonic-gate
5130Sstevel@tonic-gate /* get the hash table for HOST routes */
5143448Sdh155122 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)];
5150Sstevel@tonic-gate if (irb_ptr == NULL)
5160Sstevel@tonic-gate return;
5173448Sdh155122 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) {
5180Sstevel@tonic-gate irb = &irb_ptr[i];
51911042SErik.Nordmark@Sun.COM irb_refhold(irb);
5200Sstevel@tonic-gate for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
5213004Sdd193516 if (!(ire->ire_flags & RTF_DYNAMIC))
5220Sstevel@tonic-gate continue;
5230Sstevel@tonic-gate mutex_enter(&ire->ire_lock);
5240Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6;
5250Sstevel@tonic-gate mutex_exit(&ire->ire_lock);
5260Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway))
5270Sstevel@tonic-gate ire_delete(ire);
5280Sstevel@tonic-gate }
52911042SErik.Nordmark@Sun.COM irb_refrele(irb);
5300Sstevel@tonic-gate }
5310Sstevel@tonic-gate }
5320Sstevel@tonic-gate
5330Sstevel@tonic-gate /*
5340Sstevel@tonic-gate * Delete the specified IRE.
5350Sstevel@tonic-gate * All calls should use ire_delete().
5360Sstevel@tonic-gate * Sometimes called as writer though not required by this function.
5370Sstevel@tonic-gate *
5380Sstevel@tonic-gate * NOTE : This function is called only if the ire was added
5390Sstevel@tonic-gate * in the list.
5400Sstevel@tonic-gate */
5410Sstevel@tonic-gate void
ire_delete_v6(ire_t * ire)5420Sstevel@tonic-gate ire_delete_v6(ire_t *ire)
5430Sstevel@tonic-gate {
5440Sstevel@tonic-gate in6_addr_t gw_addr_v6;
5453448Sdh155122 ip_stack_t *ipst = ire->ire_ipst;
5460Sstevel@tonic-gate
54711042SErik.Nordmark@Sun.COM /*
54811042SErik.Nordmark@Sun.COM * Make sure ire_generation increases from ire_flush_cache happen
54911042SErik.Nordmark@Sun.COM * after any lookup/reader has read ire_generation.
55011042SErik.Nordmark@Sun.COM * Since the rw_enter makes us wait until any lookup/reader has
55111042SErik.Nordmark@Sun.COM * completed we can exit the lock immediately.
55211042SErik.Nordmark@Sun.COM */
55311042SErik.Nordmark@Sun.COM rw_enter(&ipst->ips_ip6_ire_head_lock, RW_WRITER);
55411042SErik.Nordmark@Sun.COM rw_exit(&ipst->ips_ip6_ire_head_lock);
55511042SErik.Nordmark@Sun.COM
5560Sstevel@tonic-gate ASSERT(ire->ire_refcnt >= 1);
5570Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION);
5580Sstevel@tonic-gate
55911042SErik.Nordmark@Sun.COM ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
56011042SErik.Nordmark@Sun.COM
5610Sstevel@tonic-gate if (ire->ire_type == IRE_DEFAULT) {
5620Sstevel@tonic-gate /*
5630Sstevel@tonic-gate * when a default gateway is going away
5640Sstevel@tonic-gate * delete all the host redirects pointing at that
5650Sstevel@tonic-gate * gateway.
5660Sstevel@tonic-gate */
5670Sstevel@tonic-gate mutex_enter(&ire->ire_lock);
5680Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6;
5690Sstevel@tonic-gate mutex_exit(&ire->ire_lock);
5703448Sdh155122 ire_delete_host_redirects_v6(&gw_addr_v6, ipst);
5710Sstevel@tonic-gate }
57211042SErik.Nordmark@Sun.COM
57311042SErik.Nordmark@Sun.COM /*
57411042SErik.Nordmark@Sun.COM * If we are deleting an IRE_INTERFACE then we make sure we also
57511042SErik.Nordmark@Sun.COM * delete any IRE_IF_CLONE that has been created from it.
57611042SErik.Nordmark@Sun.COM * Those are always in ire_dep_children.
57711042SErik.Nordmark@Sun.COM */
57811042SErik.Nordmark@Sun.COM if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0)
57911042SErik.Nordmark@Sun.COM ire_dep_delete_if_clone(ire);
5800Sstevel@tonic-gate
58111042SErik.Nordmark@Sun.COM /* Remove from parent dependencies and child */
58211042SErik.Nordmark@Sun.COM rw_enter(&ipst->ips_ire_dep_lock, RW_WRITER);
58311042SErik.Nordmark@Sun.COM if (ire->ire_dep_parent != NULL) {
58411042SErik.Nordmark@Sun.COM ire_dep_remove(ire);
5850Sstevel@tonic-gate }
58611042SErik.Nordmark@Sun.COM while (ire->ire_dep_children != NULL)
58711042SErik.Nordmark@Sun.COM ire_dep_remove(ire->ire_dep_children);
58811042SErik.Nordmark@Sun.COM rw_exit(&ipst->ips_ire_dep_lock);
5890Sstevel@tonic-gate }
5900Sstevel@tonic-gate
5910Sstevel@tonic-gate /*
59211042SErik.Nordmark@Sun.COM * When an IRE is added or deleted this routine is called to make sure
59311042SErik.Nordmark@Sun.COM * any caching of IRE information is notified or updated.
5940Sstevel@tonic-gate *
59511042SErik.Nordmark@Sun.COM * The flag argument indicates if the flush request is due to addition
59611042SErik.Nordmark@Sun.COM * of new route (IRE_FLUSH_ADD), deletion of old route (IRE_FLUSH_DELETE),
59711042SErik.Nordmark@Sun.COM * or a change to ire_gateway_addr (IRE_FLUSH_GWCHANGE).
5980Sstevel@tonic-gate */
5990Sstevel@tonic-gate void
ire_flush_cache_v6(ire_t * ire,int flag)6000Sstevel@tonic-gate ire_flush_cache_v6(ire_t *ire, int flag)
6010Sstevel@tonic-gate {
60211042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ire->ire_ipst;
6030Sstevel@tonic-gate
60411042SErik.Nordmark@Sun.COM /*
60511042SErik.Nordmark@Sun.COM * IRE_IF_CLONE ire's don't provide any new information
60611042SErik.Nordmark@Sun.COM * than the parent from which they are cloned, so don't
60711042SErik.Nordmark@Sun.COM * perturb the generation numbers.
60811042SErik.Nordmark@Sun.COM */
60911042SErik.Nordmark@Sun.COM if (ire->ire_type & IRE_IF_CLONE)
6104714Ssowmini return;
6110Sstevel@tonic-gate
6120Sstevel@tonic-gate /*
61311042SErik.Nordmark@Sun.COM * Ensure that an ire_add during a lookup serializes the updates of
61411042SErik.Nordmark@Sun.COM * the generation numbers under ire_head_lock so that the lookup gets
61511042SErik.Nordmark@Sun.COM * either the old ire and old generation number, or a new ire and new
61611042SErik.Nordmark@Sun.COM * generation number.
6170Sstevel@tonic-gate */
61811042SErik.Nordmark@Sun.COM rw_enter(&ipst->ips_ip6_ire_head_lock, RW_WRITER);
61911042SErik.Nordmark@Sun.COM
62011042SErik.Nordmark@Sun.COM /*
62111042SErik.Nordmark@Sun.COM * If a route was just added, we need to notify everybody that
62211042SErik.Nordmark@Sun.COM * has cached an IRE_NOROUTE since there might now be a better
62311042SErik.Nordmark@Sun.COM * route for them.
62411042SErik.Nordmark@Sun.COM */
62511042SErik.Nordmark@Sun.COM if (flag == IRE_FLUSH_ADD) {
62611042SErik.Nordmark@Sun.COM ire_increment_generation(ipst->ips_ire_reject_v6);
62711042SErik.Nordmark@Sun.COM ire_increment_generation(ipst->ips_ire_blackhole_v6);
62811042SErik.Nordmark@Sun.COM }
62911042SErik.Nordmark@Sun.COM
63011042SErik.Nordmark@Sun.COM /* Adding a default can't otherwise provide a better route */
63111042SErik.Nordmark@Sun.COM if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) {
63211042SErik.Nordmark@Sun.COM rw_exit(&ipst->ips_ip6_ire_head_lock);
6330Sstevel@tonic-gate return;
63411042SErik.Nordmark@Sun.COM }
63511042SErik.Nordmark@Sun.COM
63611042SErik.Nordmark@Sun.COM switch (flag) {
63711042SErik.Nordmark@Sun.COM case IRE_FLUSH_DELETE:
63811042SErik.Nordmark@Sun.COM case IRE_FLUSH_GWCHANGE:
6390Sstevel@tonic-gate /*
64011042SErik.Nordmark@Sun.COM * Update ire_generation for all ire_dep_children chains
64111042SErik.Nordmark@Sun.COM * starting with this IRE
6420Sstevel@tonic-gate */
64311042SErik.Nordmark@Sun.COM ire_dep_incr_generation(ire);
64411042SErik.Nordmark@Sun.COM break;
64511042SErik.Nordmark@Sun.COM case IRE_FLUSH_ADD: {
64611042SErik.Nordmark@Sun.COM in6_addr_t addr;
64711042SErik.Nordmark@Sun.COM in6_addr_t mask;
64811042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ire->ire_ipst;
64911042SErik.Nordmark@Sun.COM uint_t masklen;
65011042SErik.Nordmark@Sun.COM
65111042SErik.Nordmark@Sun.COM /*
65211042SErik.Nordmark@Sun.COM * Find an IRE which is a shorter match than the ire to be added
65311042SErik.Nordmark@Sun.COM * For any such IRE (which we repeat) we update the
65411042SErik.Nordmark@Sun.COM * ire_generation the same way as in the delete case.
65511042SErik.Nordmark@Sun.COM */
65611042SErik.Nordmark@Sun.COM addr = ire->ire_addr_v6;
65711042SErik.Nordmark@Sun.COM mask = ire->ire_mask_v6;
65811042SErik.Nordmark@Sun.COM masklen = ip_mask_to_plen_v6(&mask);
65911042SErik.Nordmark@Sun.COM
66011042SErik.Nordmark@Sun.COM ire = ire_ftable_lookup_impl_v6(&addr, &mask, NULL, 0, NULL,
66111042SErik.Nordmark@Sun.COM ALL_ZONES, NULL, MATCH_IRE_SHORTERMASK, ipst);
66211042SErik.Nordmark@Sun.COM while (ire != NULL) {
66311042SErik.Nordmark@Sun.COM /* We need to handle all in the same bucket */
66411042SErik.Nordmark@Sun.COM irb_increment_generation(ire->ire_bucket);
66511042SErik.Nordmark@Sun.COM
66611042SErik.Nordmark@Sun.COM mask = ire->ire_mask_v6;
66711042SErik.Nordmark@Sun.COM ASSERT(masklen > ip_mask_to_plen_v6(&mask));
66811042SErik.Nordmark@Sun.COM masklen = ip_mask_to_plen_v6(&mask);
66911042SErik.Nordmark@Sun.COM ire_refrele(ire);
67011042SErik.Nordmark@Sun.COM ire = ire_ftable_lookup_impl_v6(&addr, &mask, NULL, 0,
67111042SErik.Nordmark@Sun.COM NULL, ALL_ZONES, NULL, MATCH_IRE_SHORTERMASK, ipst);
6720Sstevel@tonic-gate }
6730Sstevel@tonic-gate }
67411042SErik.Nordmark@Sun.COM break;
6750Sstevel@tonic-gate }
67611042SErik.Nordmark@Sun.COM rw_exit(&ipst->ips_ip6_ire_head_lock);
6770Sstevel@tonic-gate }
6780Sstevel@tonic-gate
6790Sstevel@tonic-gate /*
6800Sstevel@tonic-gate * Matches the arguments passed with the values in the ire.
6810Sstevel@tonic-gate *
68211042SErik.Nordmark@Sun.COM * Note: for match types that match using "ill" passed in, ill
6830Sstevel@tonic-gate * must be checked for non-NULL before calling this routine.
6840Sstevel@tonic-gate */
68511042SErik.Nordmark@Sun.COM boolean_t
ire_match_args_v6(ire_t * ire,const in6_addr_t * addr,const in6_addr_t * mask,const in6_addr_t * gateway,int type,const ill_t * ill,zoneid_t zoneid,const ts_label_t * tsl,int match_flags)6860Sstevel@tonic-gate ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask,
68711042SErik.Nordmark@Sun.COM const in6_addr_t *gateway, int type, const ill_t *ill, zoneid_t zoneid,
68811042SErik.Nordmark@Sun.COM const ts_label_t *tsl, int match_flags)
6890Sstevel@tonic-gate {
6900Sstevel@tonic-gate in6_addr_t masked_addr;
6910Sstevel@tonic-gate in6_addr_t gw_addr_v6;
6920Sstevel@tonic-gate ill_t *ire_ill = NULL, *dst_ill;
69311042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ire->ire_ipst;
6940Sstevel@tonic-gate
6950Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION);
6960Sstevel@tonic-gate ASSERT(addr != NULL);
6970Sstevel@tonic-gate ASSERT(mask != NULL);
6980Sstevel@tonic-gate ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL);
69911681SSowmini.Varadhan@Sun.COM ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_SRC_ILL))) ||
70011042SErik.Nordmark@Sun.COM (ill != NULL && ill->ill_isv6));
7010Sstevel@tonic-gate
7020Sstevel@tonic-gate /*
70311042SErik.Nordmark@Sun.COM * If MATCH_IRE_TESTHIDDEN is set, then only return the IRE if it
70411042SErik.Nordmark@Sun.COM * is in fact hidden, to ensure the caller gets the right one.
7050Sstevel@tonic-gate */
70611042SErik.Nordmark@Sun.COM if (ire->ire_testhidden) {
70711042SErik.Nordmark@Sun.COM if (!(match_flags & MATCH_IRE_TESTHIDDEN))
7088485SPeter.Memishian@Sun.COM return (B_FALSE);
7098485SPeter.Memishian@Sun.COM }
7100Sstevel@tonic-gate
7111676Sjpk if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid &&
7121676Sjpk ire->ire_zoneid != ALL_ZONES) {
7130Sstevel@tonic-gate /*
71411042SErik.Nordmark@Sun.COM * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid
71511042SErik.Nordmark@Sun.COM * does not match that of ire_zoneid, a failure to
7160Sstevel@tonic-gate * match is reported at this point. Otherwise, since some IREs
7170Sstevel@tonic-gate * that are available in the global zone can be used in local
7180Sstevel@tonic-gate * zones, additional checks need to be performed:
7190Sstevel@tonic-gate *
72011042SErik.Nordmark@Sun.COM * IRE_LOOPBACK
72111042SErik.Nordmark@Sun.COM * entries should never be matched in this situation.
72211042SErik.Nordmark@Sun.COM * Each zone has its own IRE_LOOPBACK.
7230Sstevel@tonic-gate *
72411042SErik.Nordmark@Sun.COM * IRE_LOCAL
72511042SErik.Nordmark@Sun.COM * We allow them for any zoneid. ire_route_recursive
72611042SErik.Nordmark@Sun.COM * does additional checks when
72711042SErik.Nordmark@Sun.COM * ip_restrict_interzone_loopback is set.
7280Sstevel@tonic-gate *
72911042SErik.Nordmark@Sun.COM * If ill_usesrc_ifindex is set
73011042SErik.Nordmark@Sun.COM * Then we check if the zone has a valid source address
73111042SErik.Nordmark@Sun.COM * on the usesrc ill.
7320Sstevel@tonic-gate *
73311042SErik.Nordmark@Sun.COM * If ire_ill is set, then check that the zone has an ipif
73411042SErik.Nordmark@Sun.COM * on that ill.
73511042SErik.Nordmark@Sun.COM *
73611042SErik.Nordmark@Sun.COM * Outside of this function (in ire_round_robin) we check
73711042SErik.Nordmark@Sun.COM * that any IRE_OFFLINK has a gateway that reachable from the
73811042SErik.Nordmark@Sun.COM * zone when we have multiple choices (ECMP).
7390Sstevel@tonic-gate */
7400Sstevel@tonic-gate if (match_flags & MATCH_IRE_ZONEONLY)
7410Sstevel@tonic-gate return (B_FALSE);
74211042SErik.Nordmark@Sun.COM if (ire->ire_type & IRE_LOOPBACK)
7430Sstevel@tonic-gate return (B_FALSE);
74411042SErik.Nordmark@Sun.COM
74511042SErik.Nordmark@Sun.COM if (ire->ire_type & IRE_LOCAL)
74611042SErik.Nordmark@Sun.COM goto matchit;
74711042SErik.Nordmark@Sun.COM
7480Sstevel@tonic-gate /*
74911042SErik.Nordmark@Sun.COM * The normal case of IRE_ONLINK has a matching zoneid.
75011042SErik.Nordmark@Sun.COM * Here we handle the case when shared-IP zones have been
75111042SErik.Nordmark@Sun.COM * configured with IP addresses on vniN. In that case it
75211042SErik.Nordmark@Sun.COM * is ok for traffic from a zone to use IRE_ONLINK routes
75311042SErik.Nordmark@Sun.COM * if the ill has a usesrc pointing at vniN
75411042SErik.Nordmark@Sun.COM * Applies to IRE_INTERFACE.
7550Sstevel@tonic-gate */
75611042SErik.Nordmark@Sun.COM dst_ill = ire->ire_ill;
75711042SErik.Nordmark@Sun.COM if (ire->ire_type & IRE_ONLINK) {
75811042SErik.Nordmark@Sun.COM uint_t ifindex;
75911042SErik.Nordmark@Sun.COM
76011042SErik.Nordmark@Sun.COM /*
76111042SErik.Nordmark@Sun.COM * Note there is no IRE_INTERFACE on vniN thus
76211042SErik.Nordmark@Sun.COM * can't do an IRE lookup for a matching route.
76311042SErik.Nordmark@Sun.COM */
76411042SErik.Nordmark@Sun.COM ifindex = dst_ill->ill_usesrc_ifindex;
76511042SErik.Nordmark@Sun.COM if (ifindex == 0)
76611042SErik.Nordmark@Sun.COM return (B_FALSE);
76711042SErik.Nordmark@Sun.COM
7680Sstevel@tonic-gate /*
7690Sstevel@tonic-gate * If there is a usable source address in the
77011042SErik.Nordmark@Sun.COM * zone, then it's ok to return this IRE_INTERFACE
7710Sstevel@tonic-gate */
77211042SErik.Nordmark@Sun.COM if (!ipif_zone_avail(ifindex, dst_ill->ill_isv6,
77311042SErik.Nordmark@Sun.COM zoneid, ipst)) {
77411042SErik.Nordmark@Sun.COM ip3dbg(("ire_match_args: no usrsrc for zone"
7750Sstevel@tonic-gate " dst_ill %p\n", (void *)dst_ill));
7760Sstevel@tonic-gate return (B_FALSE);
7770Sstevel@tonic-gate }
7780Sstevel@tonic-gate }
77911042SErik.Nordmark@Sun.COM /*
78011681SSowmini.Varadhan@Sun.COM * For example, with
78111042SErik.Nordmark@Sun.COM * route add 11.0.0.0 gw1 -ifp bge0
78211042SErik.Nordmark@Sun.COM * route add 11.0.0.0 gw2 -ifp bge1
78311042SErik.Nordmark@Sun.COM * this code would differentiate based on
78411042SErik.Nordmark@Sun.COM * where the sending zone has addresses.
78511042SErik.Nordmark@Sun.COM * Only if the zone has an address on bge0 can it use the first
78611042SErik.Nordmark@Sun.COM * route. It isn't clear if this behavior is documented
78711042SErik.Nordmark@Sun.COM * anywhere.
78811042SErik.Nordmark@Sun.COM */
78911042SErik.Nordmark@Sun.COM if (dst_ill != NULL && (ire->ire_type & IRE_OFFLINK)) {
7900Sstevel@tonic-gate ipif_t *tipif;
7910Sstevel@tonic-gate
79211042SErik.Nordmark@Sun.COM mutex_enter(&dst_ill->ill_lock);
79311042SErik.Nordmark@Sun.COM for (tipif = dst_ill->ill_ipif;
7940Sstevel@tonic-gate tipif != NULL; tipif = tipif->ipif_next) {
79511042SErik.Nordmark@Sun.COM if (!IPIF_IS_CONDEMNED(tipif) &&
7960Sstevel@tonic-gate (tipif->ipif_flags & IPIF_UP) &&
7971676Sjpk (tipif->ipif_zoneid == zoneid ||
7981676Sjpk tipif->ipif_zoneid == ALL_ZONES))
7990Sstevel@tonic-gate break;
8000Sstevel@tonic-gate }
80111042SErik.Nordmark@Sun.COM mutex_exit(&dst_ill->ill_lock);
8020Sstevel@tonic-gate if (tipif == NULL)
8030Sstevel@tonic-gate return (B_FALSE);
8040Sstevel@tonic-gate }
8050Sstevel@tonic-gate }
8060Sstevel@tonic-gate
80711042SErik.Nordmark@Sun.COM matchit:
80811681SSowmini.Varadhan@Sun.COM ire_ill = ire->ire_ill;
8090Sstevel@tonic-gate if (match_flags & MATCH_IRE_GW) {
8100Sstevel@tonic-gate mutex_enter(&ire->ire_lock);
8110Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6;
8120Sstevel@tonic-gate mutex_exit(&ire->ire_lock);
8130Sstevel@tonic-gate }
81411042SErik.Nordmark@Sun.COM if (match_flags & MATCH_IRE_ILL) {
8158485SPeter.Memishian@Sun.COM
81611042SErik.Nordmark@Sun.COM /*
81711042SErik.Nordmark@Sun.COM * If asked to match an ill, we *must* match
81811042SErik.Nordmark@Sun.COM * on the ire_ill for ipmp test addresses, or
81911042SErik.Nordmark@Sun.COM * any of the ill in the group for data addresses.
82011042SErik.Nordmark@Sun.COM * If we don't, we may as well fail.
82111042SErik.Nordmark@Sun.COM * However, we need an exception for IRE_LOCALs to ensure
82211042SErik.Nordmark@Sun.COM * we loopback packets even sent to test addresses on different
82311042SErik.Nordmark@Sun.COM * interfaces in the group.
82411042SErik.Nordmark@Sun.COM */
82511042SErik.Nordmark@Sun.COM if ((match_flags & MATCH_IRE_TESTHIDDEN) &&
82611042SErik.Nordmark@Sun.COM !(ire->ire_type & IRE_LOCAL)) {
82711042SErik.Nordmark@Sun.COM if (ire->ire_ill != ill)
82811042SErik.Nordmark@Sun.COM return (B_FALSE);
82911042SErik.Nordmark@Sun.COM } else {
83011042SErik.Nordmark@Sun.COM match_flags &= ~MATCH_IRE_TESTHIDDEN;
83111042SErik.Nordmark@Sun.COM /*
83211042SErik.Nordmark@Sun.COM * We know that ill is not NULL, but ire_ill could be
83311042SErik.Nordmark@Sun.COM * NULL
83411042SErik.Nordmark@Sun.COM */
83511042SErik.Nordmark@Sun.COM if (ire_ill == NULL || !IS_ON_SAME_LAN(ill, ire_ill))
83611042SErik.Nordmark@Sun.COM return (B_FALSE);
83711042SErik.Nordmark@Sun.COM }
8380Sstevel@tonic-gate }
83911681SSowmini.Varadhan@Sun.COM if (match_flags & MATCH_IRE_SRC_ILL) {
84011681SSowmini.Varadhan@Sun.COM if (ire_ill == NULL)
84111681SSowmini.Varadhan@Sun.COM return (B_FALSE);
84211681SSowmini.Varadhan@Sun.COM if (!IS_ON_SAME_LAN(ill, ire_ill)) {
84311681SSowmini.Varadhan@Sun.COM if (ire_ill->ill_usesrc_ifindex == 0 ||
84411681SSowmini.Varadhan@Sun.COM (ire_ill->ill_usesrc_ifindex !=
84511681SSowmini.Varadhan@Sun.COM ill->ill_phyint->phyint_ifindex))
84611681SSowmini.Varadhan@Sun.COM return (B_FALSE);
84711681SSowmini.Varadhan@Sun.COM }
84811681SSowmini.Varadhan@Sun.COM }
84911681SSowmini.Varadhan@Sun.COM
8500Sstevel@tonic-gate /* No ire_addr_v6 bits set past the mask */
8510Sstevel@tonic-gate ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6,
8520Sstevel@tonic-gate ire->ire_addr_v6));
8530Sstevel@tonic-gate V6_MASK_COPY(*addr, *mask, masked_addr);
8540Sstevel@tonic-gate if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) &&
8550Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_GW)) ||
85612038SSowmini.Varadhan@Sun.COM ((!(match_flags & MATCH_IRE_DIRECT)) ||
85712038SSowmini.Varadhan@Sun.COM !(ire->ire_flags & RTF_INDIRECT)) &&
8584714Ssowmini IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) &&
85911042SErik.Nordmark@Sun.COM ((!(match_flags & MATCH_IRE_TYPE)) || (ire->ire_type & type)) &&
86011042SErik.Nordmark@Sun.COM ((!(match_flags & MATCH_IRE_TESTHIDDEN)) || ire->ire_testhidden) &&
86111042SErik.Nordmark@Sun.COM ((!(match_flags & MATCH_IRE_MASK)) ||
86211042SErik.Nordmark@Sun.COM (IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, mask))) &&
8631676Sjpk ((!(match_flags & MATCH_IRE_SECATTR)) ||
8644714Ssowmini (!is_system_labeled()) ||
8654714Ssowmini (tsol_ire_match_gwattr(ire, tsl) == 0))) {
8660Sstevel@tonic-gate /* We found the matched IRE */
8670Sstevel@tonic-gate return (B_TRUE);
8680Sstevel@tonic-gate }
8690Sstevel@tonic-gate return (B_FALSE);
8700Sstevel@tonic-gate }
8710Sstevel@tonic-gate
8720Sstevel@tonic-gate /*
87311042SErik.Nordmark@Sun.COM * Check if the zoneid (not ALL_ZONES) has an IRE_INTERFACE for the specified
87411042SErik.Nordmark@Sun.COM * gateway address. If ill is non-NULL we also match on it.
87511042SErik.Nordmark@Sun.COM * The caller must hold a read lock on RADIX_NODE_HEAD if lock_held is set.
8760Sstevel@tonic-gate */
87711042SErik.Nordmark@Sun.COM boolean_t
ire_gateway_ok_zone_v6(const in6_addr_t * gateway,zoneid_t zoneid,ill_t * ill,const ts_label_t * tsl,ip_stack_t * ipst,boolean_t lock_held)87811042SErik.Nordmark@Sun.COM ire_gateway_ok_zone_v6(const in6_addr_t *gateway, zoneid_t zoneid, ill_t *ill,
87911042SErik.Nordmark@Sun.COM const ts_label_t *tsl, ip_stack_t *ipst, boolean_t lock_held)
8800Sstevel@tonic-gate {
88111042SErik.Nordmark@Sun.COM ire_t *ire;
88211042SErik.Nordmark@Sun.COM uint_t match_flags;
8830Sstevel@tonic-gate
88411042SErik.Nordmark@Sun.COM if (lock_held)
88511042SErik.Nordmark@Sun.COM ASSERT(RW_READ_HELD(&ipst->ips_ip6_ire_head_lock));
88611042SErik.Nordmark@Sun.COM else
88711042SErik.Nordmark@Sun.COM rw_enter(&ipst->ips_ip6_ire_head_lock, RW_READER);
8880Sstevel@tonic-gate
88911042SErik.Nordmark@Sun.COM match_flags = MATCH_IRE_TYPE | MATCH_IRE_SECATTR;
89011042SErik.Nordmark@Sun.COM if (ill != NULL)
89111042SErik.Nordmark@Sun.COM match_flags |= MATCH_IRE_ILL;
89211042SErik.Nordmark@Sun.COM
89311042SErik.Nordmark@Sun.COM ire = ire_ftable_lookup_impl_v6(gateway, &ipv6_all_zeros,
89411042SErik.Nordmark@Sun.COM &ipv6_all_zeros, IRE_INTERFACE, ill, zoneid, tsl, match_flags,
89511042SErik.Nordmark@Sun.COM ipst);
89611042SErik.Nordmark@Sun.COM
89711042SErik.Nordmark@Sun.COM if (!lock_held)
89811042SErik.Nordmark@Sun.COM rw_exit(&ipst->ips_ip6_ire_head_lock);
89911042SErik.Nordmark@Sun.COM if (ire != NULL) {
90011042SErik.Nordmark@Sun.COM ire_refrele(ire);
90111042SErik.Nordmark@Sun.COM return (B_TRUE);
90211042SErik.Nordmark@Sun.COM } else {
90311042SErik.Nordmark@Sun.COM return (B_FALSE);
9040Sstevel@tonic-gate }
9050Sstevel@tonic-gate }
9060Sstevel@tonic-gate
9070Sstevel@tonic-gate /*
9080Sstevel@tonic-gate * Lookup a route in forwarding table.
9090Sstevel@tonic-gate * specific lookup is indicated by passing the
9100Sstevel@tonic-gate * required parameters and indicating the
9110Sstevel@tonic-gate * match required in flag field.
9120Sstevel@tonic-gate *
9130Sstevel@tonic-gate * Supports link-local addresses by following the ipif/ill when recursing.
9140Sstevel@tonic-gate */
9150Sstevel@tonic-gate ire_t *
ire_ftable_lookup_v6(const in6_addr_t * addr,const in6_addr_t * mask,const in6_addr_t * gateway,int type,const ill_t * ill,zoneid_t zoneid,const ts_label_t * tsl,int flags,uint32_t xmit_hint,ip_stack_t * ipst,uint_t * generationp)9160Sstevel@tonic-gate ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
91711042SErik.Nordmark@Sun.COM const in6_addr_t *gateway, int type, const ill_t *ill,
91811042SErik.Nordmark@Sun.COM zoneid_t zoneid, const ts_label_t *tsl, int flags,
91911042SErik.Nordmark@Sun.COM uint32_t xmit_hint, ip_stack_t *ipst, uint_t *generationp)
9200Sstevel@tonic-gate {
9210Sstevel@tonic-gate ire_t *ire = NULL;
9220Sstevel@tonic-gate
9230Sstevel@tonic-gate ASSERT(addr != NULL);
9240Sstevel@tonic-gate ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL);
9250Sstevel@tonic-gate ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL);
92611042SErik.Nordmark@Sun.COM ASSERT(ill == NULL || ill->ill_isv6);
92711042SErik.Nordmark@Sun.COM
92811042SErik.Nordmark@Sun.COM ASSERT(!IN6_IS_ADDR_V4MAPPED(addr));
92911042SErik.Nordmark@Sun.COM
93011042SErik.Nordmark@Sun.COM /*
93111042SErik.Nordmark@Sun.COM * ire_match_args_v6() will dereference ill if MATCH_IRE_ILL
93211681SSowmini.Varadhan@Sun.COM * or MATCH_IRE_SRC_ILL is set.
93311042SErik.Nordmark@Sun.COM */
93411681SSowmini.Varadhan@Sun.COM if ((flags & (MATCH_IRE_ILL|MATCH_IRE_SRC_ILL)) && (ill == NULL))
93511042SErik.Nordmark@Sun.COM return (NULL);
93611042SErik.Nordmark@Sun.COM
93711042SErik.Nordmark@Sun.COM rw_enter(&ipst->ips_ip6_ire_head_lock, RW_READER);
93811042SErik.Nordmark@Sun.COM ire = ire_ftable_lookup_impl_v6(addr, mask, gateway, type, ill, zoneid,
93911042SErik.Nordmark@Sun.COM tsl, flags, ipst);
94011042SErik.Nordmark@Sun.COM if (ire == NULL) {
94111042SErik.Nordmark@Sun.COM rw_exit(&ipst->ips_ip6_ire_head_lock);
94211042SErik.Nordmark@Sun.COM return (NULL);
94311042SErik.Nordmark@Sun.COM }
9440Sstevel@tonic-gate
9450Sstevel@tonic-gate /*
94611042SErik.Nordmark@Sun.COM * round-robin only if we have more than one route in the bucket.
94711042SErik.Nordmark@Sun.COM * ips_ip_ecmp_behavior controls when we do ECMP
94811042SErik.Nordmark@Sun.COM * 2: always
94911042SErik.Nordmark@Sun.COM * 1: for IRE_DEFAULT and /0 IRE_INTERFACE
95011042SErik.Nordmark@Sun.COM * 0: never
95111042SErik.Nordmark@Sun.COM *
95211042SErik.Nordmark@Sun.COM * Note: if we found an IRE_IF_CLONE we won't look at the bucket with
95311042SErik.Nordmark@Sun.COM * other ECMP IRE_INTERFACEs since the IRE_IF_CLONE is a /128 match
95411042SErik.Nordmark@Sun.COM * and the IRE_INTERFACESs are likely to be shorter matches.
9550Sstevel@tonic-gate */
95611042SErik.Nordmark@Sun.COM if (ire->ire_bucket->irb_ire_cnt > 1 && !(flags & MATCH_IRE_GW)) {
95711042SErik.Nordmark@Sun.COM if (ipst->ips_ip_ecmp_behavior == 2 ||
95811042SErik.Nordmark@Sun.COM (ipst->ips_ip_ecmp_behavior == 1 &&
95911042SErik.Nordmark@Sun.COM IS_DEFAULT_ROUTE_V6(ire))) {
96011042SErik.Nordmark@Sun.COM ire_t *next_ire;
96111042SErik.Nordmark@Sun.COM ire_ftable_args_t margs;
96211042SErik.Nordmark@Sun.COM
96311131SErik.Nordmark@Sun.COM bzero(&margs, sizeof (margs));
96411042SErik.Nordmark@Sun.COM margs.ift_addr_v6 = *addr;
96511042SErik.Nordmark@Sun.COM if (mask != NULL)
96611042SErik.Nordmark@Sun.COM margs.ift_mask_v6 = *mask;
96711042SErik.Nordmark@Sun.COM if (gateway != NULL)
96811042SErik.Nordmark@Sun.COM margs.ift_gateway_v6 = *gateway;
96911042SErik.Nordmark@Sun.COM margs.ift_type = type;
97011042SErik.Nordmark@Sun.COM margs.ift_ill = ill;
97111042SErik.Nordmark@Sun.COM margs.ift_zoneid = zoneid;
97211042SErik.Nordmark@Sun.COM margs.ift_tsl = tsl;
97311042SErik.Nordmark@Sun.COM margs.ift_flags = flags;
97411042SErik.Nordmark@Sun.COM
97511042SErik.Nordmark@Sun.COM next_ire = ire_round_robin(ire->ire_bucket, &margs,
97611042SErik.Nordmark@Sun.COM xmit_hint, ire, ipst);
97711042SErik.Nordmark@Sun.COM if (next_ire == NULL) {
97811042SErik.Nordmark@Sun.COM /* keep ire if next_ire is null */
97911042SErik.Nordmark@Sun.COM goto done;
98011042SErik.Nordmark@Sun.COM }
98111042SErik.Nordmark@Sun.COM ire_refrele(ire);
98211042SErik.Nordmark@Sun.COM ire = next_ire;
98311042SErik.Nordmark@Sun.COM }
98411042SErik.Nordmark@Sun.COM }
98511042SErik.Nordmark@Sun.COM
98611042SErik.Nordmark@Sun.COM done:
98711042SErik.Nordmark@Sun.COM /* Return generation before dropping lock */
98811042SErik.Nordmark@Sun.COM if (generationp != NULL)
98911042SErik.Nordmark@Sun.COM *generationp = ire->ire_generation;
99011042SErik.Nordmark@Sun.COM
99111042SErik.Nordmark@Sun.COM rw_exit(&ipst->ips_ip6_ire_head_lock);
99211042SErik.Nordmark@Sun.COM
9930Sstevel@tonic-gate /*
99411042SErik.Nordmark@Sun.COM * For shared-IP zones we need additional checks to what was
99511042SErik.Nordmark@Sun.COM * done in ire_match_args to make sure IRE_LOCALs are handled.
99611042SErik.Nordmark@Sun.COM *
99711042SErik.Nordmark@Sun.COM * When ip_restrict_interzone_loopback is set, then
99811042SErik.Nordmark@Sun.COM * we ensure that IRE_LOCAL are only used for loopback
99911042SErik.Nordmark@Sun.COM * between zones when the logical "Ethernet" would
100011042SErik.Nordmark@Sun.COM * have looped them back. That is, if in the absense of
100111042SErik.Nordmark@Sun.COM * the IRE_LOCAL we would have sent to packet out the
100211042SErik.Nordmark@Sun.COM * same ill.
10030Sstevel@tonic-gate */
100411042SErik.Nordmark@Sun.COM if ((ire->ire_type & IRE_LOCAL) && zoneid != ALL_ZONES &&
100511042SErik.Nordmark@Sun.COM ire->ire_zoneid != zoneid && ire->ire_zoneid != ALL_ZONES &&
100611042SErik.Nordmark@Sun.COM ipst->ips_ip_restrict_interzone_loopback) {
100711042SErik.Nordmark@Sun.COM ire = ire_alt_local(ire, zoneid, tsl, ill, generationp);
100811042SErik.Nordmark@Sun.COM ASSERT(ire != NULL);
100911042SErik.Nordmark@Sun.COM }
101011042SErik.Nordmark@Sun.COM
101111042SErik.Nordmark@Sun.COM return (ire);
101211042SErik.Nordmark@Sun.COM }
101311042SErik.Nordmark@Sun.COM
101411042SErik.Nordmark@Sun.COM /*
101511042SErik.Nordmark@Sun.COM * Look up a single ire. The caller holds either the read or write lock.
101611042SErik.Nordmark@Sun.COM */
101711042SErik.Nordmark@Sun.COM ire_t *
ire_ftable_lookup_impl_v6(const in6_addr_t * addr,const in6_addr_t * mask,const in6_addr_t * gateway,int type,const ill_t * ill,zoneid_t zoneid,const ts_label_t * tsl,int flags,ip_stack_t * ipst)101811042SErik.Nordmark@Sun.COM ire_ftable_lookup_impl_v6(const in6_addr_t *addr, const in6_addr_t *mask,
101911042SErik.Nordmark@Sun.COM const in6_addr_t *gateway, int type, const ill_t *ill,
102011042SErik.Nordmark@Sun.COM zoneid_t zoneid, const ts_label_t *tsl, int flags,
102111042SErik.Nordmark@Sun.COM ip_stack_t *ipst)
102211042SErik.Nordmark@Sun.COM {
102311042SErik.Nordmark@Sun.COM irb_t *irb_ptr;
102411042SErik.Nordmark@Sun.COM ire_t *ire = NULL;
102511042SErik.Nordmark@Sun.COM int i;
102611042SErik.Nordmark@Sun.COM
102711042SErik.Nordmark@Sun.COM ASSERT(RW_LOCK_HELD(&ipst->ips_ip6_ire_head_lock));
10280Sstevel@tonic-gate
10290Sstevel@tonic-gate /*
10300Sstevel@tonic-gate * If the mask is known, the lookup
10310Sstevel@tonic-gate * is simple, if the mask is not known
10320Sstevel@tonic-gate * we need to search.
10330Sstevel@tonic-gate */
10340Sstevel@tonic-gate if (flags & MATCH_IRE_MASK) {
10350Sstevel@tonic-gate uint_t masklen;
10360Sstevel@tonic-gate
10370Sstevel@tonic-gate masklen = ip_mask_to_plen_v6(mask);
103811042SErik.Nordmark@Sun.COM if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) {
10390Sstevel@tonic-gate return (NULL);
104011042SErik.Nordmark@Sun.COM }
10413448Sdh155122 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][
10423448Sdh155122 IRE_ADDR_MASK_HASH_V6(*addr, *mask,
10434714Ssowmini ipst->ips_ip6_ftable_hash_size)]);
10440Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER);
10450Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL;
10460Sstevel@tonic-gate ire = ire->ire_next) {
104711042SErik.Nordmark@Sun.COM if (IRE_IS_CONDEMNED(ire))
10480Sstevel@tonic-gate continue;
10490Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, mask, gateway, type,
105011042SErik.Nordmark@Sun.COM ill, zoneid, tsl, flags))
10510Sstevel@tonic-gate goto found_ire;
10520Sstevel@tonic-gate }
10530Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock);
10540Sstevel@tonic-gate } else {
105511042SErik.Nordmark@Sun.COM uint_t masklen;
105611042SErik.Nordmark@Sun.COM
10570Sstevel@tonic-gate /*
10580Sstevel@tonic-gate * In this case we don't know the mask, we need to
10590Sstevel@tonic-gate * search the table assuming different mask sizes.
10600Sstevel@tonic-gate */
106111042SErik.Nordmark@Sun.COM if (flags & MATCH_IRE_SHORTERMASK) {
106211042SErik.Nordmark@Sun.COM masklen = ip_mask_to_plen_v6(mask);
106311042SErik.Nordmark@Sun.COM if (masklen == 0) {
106411042SErik.Nordmark@Sun.COM /* Nothing shorter than zero */
106511042SErik.Nordmark@Sun.COM return (NULL);
106611042SErik.Nordmark@Sun.COM }
106711042SErik.Nordmark@Sun.COM masklen--;
106811042SErik.Nordmark@Sun.COM } else {
106911042SErik.Nordmark@Sun.COM masklen = IP6_MASK_TABLE_SIZE - 1;
107011042SErik.Nordmark@Sun.COM }
107111042SErik.Nordmark@Sun.COM
107211042SErik.Nordmark@Sun.COM for (i = masklen; i >= 0; i--) {
10730Sstevel@tonic-gate in6_addr_t tmpmask;
10740Sstevel@tonic-gate
10753448Sdh155122 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL)
10760Sstevel@tonic-gate continue;
10770Sstevel@tonic-gate (void) ip_plen_to_mask_v6(i, &tmpmask);
10783448Sdh155122 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][
10790Sstevel@tonic-gate IRE_ADDR_MASK_HASH_V6(*addr, tmpmask,
10803448Sdh155122 ipst->ips_ip6_ftable_hash_size)];
10810Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER);
10820Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL;
10830Sstevel@tonic-gate ire = ire->ire_next) {
108411042SErik.Nordmark@Sun.COM if (IRE_IS_CONDEMNED(ire))
10850Sstevel@tonic-gate continue;
10860Sstevel@tonic-gate if (ire_match_args_v6(ire, addr,
108711042SErik.Nordmark@Sun.COM &ire->ire_mask_v6, gateway, type, ill,
108811042SErik.Nordmark@Sun.COM zoneid, tsl, flags))
10890Sstevel@tonic-gate goto found_ire;
10900Sstevel@tonic-gate }
10910Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock);
10920Sstevel@tonic-gate }
10930Sstevel@tonic-gate }
10940Sstevel@tonic-gate ASSERT(ire == NULL);
10950Sstevel@tonic-gate ip1dbg(("ire_ftable_lookup_v6: returning NULL ire"));
10960Sstevel@tonic-gate return (NULL);
109711042SErik.Nordmark@Sun.COM
10980Sstevel@tonic-gate found_ire:
109911042SErik.Nordmark@Sun.COM ire_refhold(ire);
11000Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock);
11010Sstevel@tonic-gate return (ire);
11020Sstevel@tonic-gate }
11030Sstevel@tonic-gate
11040Sstevel@tonic-gate
11050Sstevel@tonic-gate /*
110611042SErik.Nordmark@Sun.COM * This function is called by
110711042SErik.Nordmark@Sun.COM * ip_input/ire_route_recursive when doing a route lookup on only the
110811042SErik.Nordmark@Sun.COM * destination address.
110911042SErik.Nordmark@Sun.COM *
111011042SErik.Nordmark@Sun.COM * The optimizations of this function over ire_ftable_lookup are:
111111042SErik.Nordmark@Sun.COM * o removing unnecessary flag matching
111211042SErik.Nordmark@Sun.COM * o doing longest prefix match instead of overloading it further
111311042SErik.Nordmark@Sun.COM * with the unnecessary "best_prefix_match"
111411042SErik.Nordmark@Sun.COM *
111511042SErik.Nordmark@Sun.COM * If no route is found we return IRE_NOROUTE.
11160Sstevel@tonic-gate */
111711042SErik.Nordmark@Sun.COM ire_t *
ire_ftable_lookup_simple_v6(const in6_addr_t * addr,uint32_t xmit_hint,ip_stack_t * ipst,uint_t * generationp)111811042SErik.Nordmark@Sun.COM ire_ftable_lookup_simple_v6(const in6_addr_t *addr, uint32_t xmit_hint,
111911042SErik.Nordmark@Sun.COM ip_stack_t *ipst, uint_t *generationp)
11200Sstevel@tonic-gate {
112111042SErik.Nordmark@Sun.COM ire_t *ire;
11220Sstevel@tonic-gate
112311042SErik.Nordmark@Sun.COM ire = ire_ftable_lookup_v6(addr, NULL, NULL, 0, NULL, ALL_ZONES, NULL,
112411042SErik.Nordmark@Sun.COM MATCH_IRE_DSTONLY, xmit_hint, ipst, generationp);
112511042SErik.Nordmark@Sun.COM if (ire == NULL) {
112611042SErik.Nordmark@Sun.COM ire = ire_reject(ipst, B_TRUE);
112711042SErik.Nordmark@Sun.COM if (generationp != NULL)
112811042SErik.Nordmark@Sun.COM *generationp = IRE_GENERATION_VERIFY;
11290Sstevel@tonic-gate }
113011042SErik.Nordmark@Sun.COM /* ftable_lookup did round robin */
113111042SErik.Nordmark@Sun.COM return (ire);
11320Sstevel@tonic-gate }
11330Sstevel@tonic-gate
11340Sstevel@tonic-gate ire_t *
ip_select_route_v6(const in6_addr_t * dst,const in6_addr_t src,ip_xmit_attr_t * ixa,uint_t * generationp,in6_addr_t * setsrcp,int * errorp,boolean_t * multirtp)113511681SSowmini.Varadhan@Sun.COM ip_select_route_v6(const in6_addr_t *dst, const in6_addr_t src,
113611681SSowmini.Varadhan@Sun.COM ip_xmit_attr_t *ixa, uint_t *generationp, in6_addr_t *setsrcp,
113711681SSowmini.Varadhan@Sun.COM int *errorp, boolean_t *multirtp)
11380Sstevel@tonic-gate {
113911042SErik.Nordmark@Sun.COM ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
11400Sstevel@tonic-gate
114111681SSowmini.Varadhan@Sun.COM return (ip_select_route(dst, src, ixa, generationp, setsrcp, errorp,
114211042SErik.Nordmark@Sun.COM multirtp));
11430Sstevel@tonic-gate }
11447880SJonathan.Anderson@Sun.COM
11457880SJonathan.Anderson@Sun.COM /*
114611042SErik.Nordmark@Sun.COM * Recursively look for a route to the destination. Can also match on
114711042SErik.Nordmark@Sun.COM * the zoneid, ill, and label. Used for the data paths. See also
114811042SErik.Nordmark@Sun.COM * ire_route_recursive_dstonly.
114911042SErik.Nordmark@Sun.COM *
115011457SErik.Nordmark@Sun.COM * If IRR_ALLOCATE is not set then we will only inspect the existing IREs; never
115111042SErik.Nordmark@Sun.COM * create an IRE_IF_CLONE. This is used on the receive side when we are not
115211042SErik.Nordmark@Sun.COM * forwarding.
115311457SErik.Nordmark@Sun.COM * If IRR_INCOMPLETE is set then we return the IRE even if we can't correctly
115411457SErik.Nordmark@Sun.COM * resolve the gateway.
115511042SErik.Nordmark@Sun.COM *
115611042SErik.Nordmark@Sun.COM * Note that this function never returns NULL. It returns an IRE_NOROUTE
115711042SErik.Nordmark@Sun.COM * instead.
115811042SErik.Nordmark@Sun.COM *
115911042SErik.Nordmark@Sun.COM * If we find any IRE_LOCAL|BROADCAST etc past the first iteration it
116011042SErik.Nordmark@Sun.COM * is an error.
116111042SErik.Nordmark@Sun.COM * Allow at most one RTF_INDIRECT.
11627880SJonathan.Anderson@Sun.COM */
116311042SErik.Nordmark@Sun.COM ire_t *
ire_route_recursive_impl_v6(ire_t * ire,const in6_addr_t * nexthop,uint_t ire_type,const ill_t * ill_arg,zoneid_t zoneid,const ts_label_t * tsl,uint_t match_args,uint_t irr_flags,uint32_t xmit_hint,ip_stack_t * ipst,in6_addr_t * setsrcp,tsol_ire_gw_secattr_t ** gwattrp,uint_t * generationp)116411042SErik.Nordmark@Sun.COM ire_route_recursive_impl_v6(ire_t *ire,
116511042SErik.Nordmark@Sun.COM const in6_addr_t *nexthop, uint_t ire_type, const ill_t *ill_arg,
116611042SErik.Nordmark@Sun.COM zoneid_t zoneid, const ts_label_t *tsl, uint_t match_args,
116711457SErik.Nordmark@Sun.COM uint_t irr_flags, uint32_t xmit_hint, ip_stack_t *ipst,
116811042SErik.Nordmark@Sun.COM in6_addr_t *setsrcp, tsol_ire_gw_secattr_t **gwattrp, uint_t *generationp)
11697880SJonathan.Anderson@Sun.COM {
117011042SErik.Nordmark@Sun.COM int i, j;
117111042SErik.Nordmark@Sun.COM in6_addr_t v6nexthop = *nexthop;
117211042SErik.Nordmark@Sun.COM ire_t *ires[MAX_IRE_RECURSION];
117311042SErik.Nordmark@Sun.COM uint_t generation;
117411042SErik.Nordmark@Sun.COM uint_t generations[MAX_IRE_RECURSION];
117511042SErik.Nordmark@Sun.COM boolean_t need_refrele = B_FALSE;
117611042SErik.Nordmark@Sun.COM boolean_t invalidate = B_FALSE;
117711042SErik.Nordmark@Sun.COM ill_t *ill = NULL;
117812038SSowmini.Varadhan@Sun.COM uint_t maskoff = (IRE_LOCAL|IRE_LOOPBACK);
117911042SErik.Nordmark@Sun.COM
118011042SErik.Nordmark@Sun.COM if (setsrcp != NULL)
118111042SErik.Nordmark@Sun.COM ASSERT(IN6_IS_ADDR_UNSPECIFIED(setsrcp));
118211042SErik.Nordmark@Sun.COM if (gwattrp != NULL)
118311042SErik.Nordmark@Sun.COM ASSERT(*gwattrp == NULL);
118411042SErik.Nordmark@Sun.COM
118511042SErik.Nordmark@Sun.COM /*
118611042SErik.Nordmark@Sun.COM * We iterate up to three times to resolve a route, even though
118711042SErik.Nordmark@Sun.COM * we have four slots in the array. The extra slot is for an
118811042SErik.Nordmark@Sun.COM * IRE_IF_CLONE we might need to create.
118911042SErik.Nordmark@Sun.COM */
119011042SErik.Nordmark@Sun.COM i = 0;
119111042SErik.Nordmark@Sun.COM while (i < MAX_IRE_RECURSION - 1) {
119211042SErik.Nordmark@Sun.COM /* ire_ftable_lookup handles round-robin/ECMP */
119311042SErik.Nordmark@Sun.COM if (ire == NULL) {
119411042SErik.Nordmark@Sun.COM ire = ire_ftable_lookup_v6(&v6nexthop, 0, 0, ire_type,
119511681SSowmini.Varadhan@Sun.COM (ill != NULL ? ill : ill_arg), zoneid, tsl,
119611042SErik.Nordmark@Sun.COM match_args, xmit_hint, ipst, &generation);
119711042SErik.Nordmark@Sun.COM } else {
119811042SErik.Nordmark@Sun.COM /* Caller passed it; extra hold since we will rele */
119911042SErik.Nordmark@Sun.COM ire_refhold(ire);
120011042SErik.Nordmark@Sun.COM if (generationp != NULL)
120111042SErik.Nordmark@Sun.COM generation = *generationp;
120211042SErik.Nordmark@Sun.COM else
120311042SErik.Nordmark@Sun.COM generation = IRE_GENERATION_VERIFY;
120411042SErik.Nordmark@Sun.COM }
120511042SErik.Nordmark@Sun.COM
120612038SSowmini.Varadhan@Sun.COM if (ire == NULL) {
120712038SSowmini.Varadhan@Sun.COM if (i > 0 && (irr_flags & IRR_INCOMPLETE)) {
120812038SSowmini.Varadhan@Sun.COM ire = ires[0];
120912038SSowmini.Varadhan@Sun.COM ire_refhold(ire);
121012038SSowmini.Varadhan@Sun.COM } else {
121112038SSowmini.Varadhan@Sun.COM ire = ire_reject(ipst, B_TRUE);
121212038SSowmini.Varadhan@Sun.COM }
121312038SSowmini.Varadhan@Sun.COM goto error;
121412038SSowmini.Varadhan@Sun.COM }
121511042SErik.Nordmark@Sun.COM
121611042SErik.Nordmark@Sun.COM /* Need to return the ire with RTF_REJECT|BLACKHOLE */
121711042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))
121811042SErik.Nordmark@Sun.COM goto error;
121911042SErik.Nordmark@Sun.COM
122011042SErik.Nordmark@Sun.COM ASSERT(!(ire->ire_type & IRE_MULTICAST)); /* Not in ftable */
1221*12985SSowmini.Varadhan@oracle.COM /*
1222*12985SSowmini.Varadhan@oracle.COM * Verify that the IRE_IF_CLONE has a consistent generation
1223*12985SSowmini.Varadhan@oracle.COM * number.
1224*12985SSowmini.Varadhan@oracle.COM */
1225*12985SSowmini.Varadhan@oracle.COM if ((ire->ire_type & IRE_IF_CLONE) && !ire_clone_verify(ire)) {
1226*12985SSowmini.Varadhan@oracle.COM ire_refrele(ire);
1227*12985SSowmini.Varadhan@oracle.COM ire = NULL;
1228*12985SSowmini.Varadhan@oracle.COM continue;
1229*12985SSowmini.Varadhan@oracle.COM }
123011042SErik.Nordmark@Sun.COM
123112038SSowmini.Varadhan@Sun.COM /*
123212038SSowmini.Varadhan@Sun.COM * Don't allow anything unusual past the first iteration.
123312038SSowmini.Varadhan@Sun.COM * After the first lookup, we should no longer look for
123412038SSowmini.Varadhan@Sun.COM * (IRE_LOCAL|IRE_LOOPBACK) or RTF_INDIRECT routes.
123512038SSowmini.Varadhan@Sun.COM *
123612038SSowmini.Varadhan@Sun.COM * In addition, after we have found a direct IRE_OFFLINK,
123712038SSowmini.Varadhan@Sun.COM * we should only look for interface or clone routes.
123812038SSowmini.Varadhan@Sun.COM */
123912038SSowmini.Varadhan@Sun.COM match_args |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */
124012038SSowmini.Varadhan@Sun.COM if ((ire->ire_type & IRE_OFFLINK) &&
124112038SSowmini.Varadhan@Sun.COM !(ire->ire_flags & RTF_INDIRECT)) {
124212038SSowmini.Varadhan@Sun.COM ire_type = IRE_IF_ALL;
124312038SSowmini.Varadhan@Sun.COM } else {
124412038SSowmini.Varadhan@Sun.COM if (!(match_args & MATCH_IRE_TYPE))
124512038SSowmini.Varadhan@Sun.COM ire_type = (IRE_OFFLINK|IRE_ONLINK);
124612038SSowmini.Varadhan@Sun.COM ire_type &= ~maskoff; /* no more LOCAL, LOOPBACK */
124711042SErik.Nordmark@Sun.COM }
124812038SSowmini.Varadhan@Sun.COM match_args |= MATCH_IRE_TYPE;
124911042SErik.Nordmark@Sun.COM /* We have a usable IRE */
125011042SErik.Nordmark@Sun.COM ires[i] = ire;
125111042SErik.Nordmark@Sun.COM generations[i] = generation;
125211042SErik.Nordmark@Sun.COM i++;
125311042SErik.Nordmark@Sun.COM
125411042SErik.Nordmark@Sun.COM /* The first RTF_SETSRC address is passed back if setsrcp */
125511042SErik.Nordmark@Sun.COM if ((ire->ire_flags & RTF_SETSRC) &&
125611042SErik.Nordmark@Sun.COM setsrcp != NULL && IN6_IS_ADDR_UNSPECIFIED(setsrcp)) {
125711042SErik.Nordmark@Sun.COM ASSERT(!IN6_IS_ADDR_UNSPECIFIED(
125811042SErik.Nordmark@Sun.COM &ire->ire_setsrc_addr_v6));
125911042SErik.Nordmark@Sun.COM *setsrcp = ire->ire_setsrc_addr_v6;
126011042SErik.Nordmark@Sun.COM }
126111042SErik.Nordmark@Sun.COM
126211042SErik.Nordmark@Sun.COM /* The first ire_gw_secattr is passed back if gwattrp */
126311042SErik.Nordmark@Sun.COM if (ire->ire_gw_secattr != NULL &&
126411042SErik.Nordmark@Sun.COM gwattrp != NULL && *gwattrp == NULL)
126511042SErik.Nordmark@Sun.COM *gwattrp = ire->ire_gw_secattr;
126611042SErik.Nordmark@Sun.COM
126711042SErik.Nordmark@Sun.COM /*
126811042SErik.Nordmark@Sun.COM * Check if we have a short-cut pointer to an IRE for this
126911042SErik.Nordmark@Sun.COM * destination, and that the cached dependency isn't stale.
127011042SErik.Nordmark@Sun.COM * In that case we've rejoined an existing tree towards a
127111042SErik.Nordmark@Sun.COM * parent, thus we don't need to continue the loop to
127211042SErik.Nordmark@Sun.COM * discover the rest of the tree.
127311042SErik.Nordmark@Sun.COM */
127411042SErik.Nordmark@Sun.COM mutex_enter(&ire->ire_lock);
127511042SErik.Nordmark@Sun.COM if (ire->ire_dep_parent != NULL &&
127611042SErik.Nordmark@Sun.COM ire->ire_dep_parent->ire_generation ==
127711042SErik.Nordmark@Sun.COM ire->ire_dep_parent_generation) {
127811042SErik.Nordmark@Sun.COM mutex_exit(&ire->ire_lock);
127911042SErik.Nordmark@Sun.COM ire = NULL;
128011042SErik.Nordmark@Sun.COM goto done;
128111042SErik.Nordmark@Sun.COM }
128211042SErik.Nordmark@Sun.COM mutex_exit(&ire->ire_lock);
128311042SErik.Nordmark@Sun.COM
128411042SErik.Nordmark@Sun.COM /*
128511042SErik.Nordmark@Sun.COM * If this type should have an ire_nce_cache (even if it
128611042SErik.Nordmark@Sun.COM * doesn't yet have one) then we are done. Includes
128711042SErik.Nordmark@Sun.COM * IRE_INTERFACE with a full 128 bit mask.
128811042SErik.Nordmark@Sun.COM */
128911042SErik.Nordmark@Sun.COM if (ire->ire_nce_capable) {
129011042SErik.Nordmark@Sun.COM ire = NULL;
129111042SErik.Nordmark@Sun.COM goto done;
129211042SErik.Nordmark@Sun.COM }
129311042SErik.Nordmark@Sun.COM ASSERT(!(ire->ire_type & IRE_IF_CLONE));
129411042SErik.Nordmark@Sun.COM /*
129511042SErik.Nordmark@Sun.COM * For an IRE_INTERFACE we create an IRE_IF_CLONE for this
129611042SErik.Nordmark@Sun.COM * particular destination
129711042SErik.Nordmark@Sun.COM */
129811042SErik.Nordmark@Sun.COM if (ire->ire_type & IRE_INTERFACE) {
129911042SErik.Nordmark@Sun.COM ire_t *clone;
130011042SErik.Nordmark@Sun.COM
130111042SErik.Nordmark@Sun.COM ASSERT(ire->ire_masklen != IPV6_ABITS);
130211042SErik.Nordmark@Sun.COM
130311042SErik.Nordmark@Sun.COM /*
130411042SErik.Nordmark@Sun.COM * In the case of ip_input and ILLF_FORWARDING not
130511457SErik.Nordmark@Sun.COM * being set, and in the case of RTM_GET, there is
130611457SErik.Nordmark@Sun.COM * no point in allocating an IRE_IF_CLONE. We return
130711457SErik.Nordmark@Sun.COM * the IRE_INTERFACE. Note that !IRR_ALLOCATE can
130811457SErik.Nordmark@Sun.COM * result in a ire_dep_parent which is IRE_IF_*
130911457SErik.Nordmark@Sun.COM * without an IRE_IF_CLONE.
131011042SErik.Nordmark@Sun.COM * We recover from that when we need to send packets
131111042SErik.Nordmark@Sun.COM * by ensuring that the generations become
131211042SErik.Nordmark@Sun.COM * IRE_GENERATION_VERIFY in this case.
131311042SErik.Nordmark@Sun.COM */
131411457SErik.Nordmark@Sun.COM if (!(irr_flags & IRR_ALLOCATE)) {
131511042SErik.Nordmark@Sun.COM invalidate = B_TRUE;
131611042SErik.Nordmark@Sun.COM ire = NULL;
131711042SErik.Nordmark@Sun.COM goto done;
131811042SErik.Nordmark@Sun.COM }
131911042SErik.Nordmark@Sun.COM
132011042SErik.Nordmark@Sun.COM clone = ire_create_if_clone(ire, &v6nexthop,
132111042SErik.Nordmark@Sun.COM &generation);
132211042SErik.Nordmark@Sun.COM if (clone == NULL) {
132311042SErik.Nordmark@Sun.COM /*
132411042SErik.Nordmark@Sun.COM * Temporary failure - no memory.
132511042SErik.Nordmark@Sun.COM * Don't want caller to cache IRE_NOROUTE.
132611042SErik.Nordmark@Sun.COM */
132711042SErik.Nordmark@Sun.COM invalidate = B_TRUE;
132811042SErik.Nordmark@Sun.COM ire = ire_blackhole(ipst, B_TRUE);
132911042SErik.Nordmark@Sun.COM goto error;
133011042SErik.Nordmark@Sun.COM }
133111042SErik.Nordmark@Sun.COM /*
133211042SErik.Nordmark@Sun.COM * Make clone next to last entry and the
133311042SErik.Nordmark@Sun.COM * IRE_INTERFACE the last in the dependency
133411042SErik.Nordmark@Sun.COM * chain since the clone depends on the
133511042SErik.Nordmark@Sun.COM * IRE_INTERFACE.
133611042SErik.Nordmark@Sun.COM */
133711042SErik.Nordmark@Sun.COM ASSERT(i >= 1);
133811042SErik.Nordmark@Sun.COM ASSERT(i < MAX_IRE_RECURSION);
133911042SErik.Nordmark@Sun.COM
134011042SErik.Nordmark@Sun.COM ires[i] = ires[i-1];
134111042SErik.Nordmark@Sun.COM generations[i] = generations[i-1];
134211042SErik.Nordmark@Sun.COM ires[i-1] = clone;
134311042SErik.Nordmark@Sun.COM generations[i-1] = generation;
134411042SErik.Nordmark@Sun.COM i++;
134511042SErik.Nordmark@Sun.COM
134611042SErik.Nordmark@Sun.COM ire = NULL;
134711042SErik.Nordmark@Sun.COM goto done;
134811042SErik.Nordmark@Sun.COM }
134911042SErik.Nordmark@Sun.COM
135011042SErik.Nordmark@Sun.COM /*
135111042SErik.Nordmark@Sun.COM * We only match on the type and optionally ILL when
135211042SErik.Nordmark@Sun.COM * recursing. The type match is used by some callers
135311042SErik.Nordmark@Sun.COM * to exclude certain types (such as IRE_IF_CLONE or
135411042SErik.Nordmark@Sun.COM * IRE_LOCAL|IRE_LOOPBACK).
135511681SSowmini.Varadhan@Sun.COM *
135611681SSowmini.Varadhan@Sun.COM * In the MATCH_IRE_SRC_ILL case, ill_arg may be the 'srcof'
135711681SSowmini.Varadhan@Sun.COM * ire->ire_ill, and we want to find the IRE_INTERFACE for
135811681SSowmini.Varadhan@Sun.COM * ire_ill, so we set ill to the ire_ill
135911042SErik.Nordmark@Sun.COM */
136012038SSowmini.Varadhan@Sun.COM match_args &= (MATCH_IRE_TYPE | MATCH_IRE_DIRECT);
136111042SErik.Nordmark@Sun.COM v6nexthop = ire->ire_gateway_addr_v6;
136211042SErik.Nordmark@Sun.COM if (ill == NULL && ire->ire_ill != NULL) {
136311042SErik.Nordmark@Sun.COM ill = ire->ire_ill;
136411042SErik.Nordmark@Sun.COM need_refrele = B_TRUE;
136511042SErik.Nordmark@Sun.COM ill_refhold(ill);
136611042SErik.Nordmark@Sun.COM match_args |= MATCH_IRE_ILL;
136711042SErik.Nordmark@Sun.COM }
136811042SErik.Nordmark@Sun.COM ire = NULL;
136911042SErik.Nordmark@Sun.COM }
137011042SErik.Nordmark@Sun.COM ASSERT(ire == NULL);
137111042SErik.Nordmark@Sun.COM ire = ire_reject(ipst, B_TRUE);
137211042SErik.Nordmark@Sun.COM
137311042SErik.Nordmark@Sun.COM error:
137411042SErik.Nordmark@Sun.COM ASSERT(ire != NULL);
137511042SErik.Nordmark@Sun.COM if (need_refrele)
137611042SErik.Nordmark@Sun.COM ill_refrele(ill);
137711042SErik.Nordmark@Sun.COM
137811042SErik.Nordmark@Sun.COM /*
137911042SErik.Nordmark@Sun.COM * In the case of MULTIRT we want to try a different IRE the next
138011042SErik.Nordmark@Sun.COM * time. We let the next packet retry in that case.
138111042SErik.Nordmark@Sun.COM */
138211042SErik.Nordmark@Sun.COM if (i > 0 && (ires[0]->ire_flags & RTF_MULTIRT))
138311042SErik.Nordmark@Sun.COM (void) ire_no_good(ires[0]);
138411042SErik.Nordmark@Sun.COM
138511042SErik.Nordmark@Sun.COM cleanup:
138611042SErik.Nordmark@Sun.COM /* cleanup ires[i] */
138711042SErik.Nordmark@Sun.COM ire_dep_unbuild(ires, i);
138811042SErik.Nordmark@Sun.COM for (j = 0; j < i; j++)
138911042SErik.Nordmark@Sun.COM ire_refrele(ires[j]);
139011042SErik.Nordmark@Sun.COM
139111457SErik.Nordmark@Sun.COM ASSERT((ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
139211457SErik.Nordmark@Sun.COM (irr_flags & IRR_INCOMPLETE));
139311042SErik.Nordmark@Sun.COM /*
139411042SErik.Nordmark@Sun.COM * Use IRE_GENERATION_VERIFY to ensure that ip_output will redo the
139511042SErik.Nordmark@Sun.COM * ip_select_route since the reject or lack of memory might be gone.
139611042SErik.Nordmark@Sun.COM */
139711042SErik.Nordmark@Sun.COM if (generationp != NULL)
139811042SErik.Nordmark@Sun.COM *generationp = IRE_GENERATION_VERIFY;
139911042SErik.Nordmark@Sun.COM return (ire);
140011042SErik.Nordmark@Sun.COM
140111042SErik.Nordmark@Sun.COM done:
140211042SErik.Nordmark@Sun.COM ASSERT(ire == NULL);
140311042SErik.Nordmark@Sun.COM if (need_refrele)
140411042SErik.Nordmark@Sun.COM ill_refrele(ill);
140511042SErik.Nordmark@Sun.COM
140611042SErik.Nordmark@Sun.COM /* Build dependencies */
140711131SErik.Nordmark@Sun.COM if (i > 1 && !ire_dep_build(ires, generations, i)) {
140811042SErik.Nordmark@Sun.COM /* Something in chain was condemned; tear it apart */
140911042SErik.Nordmark@Sun.COM ire = ire_blackhole(ipst, B_TRUE);
141011042SErik.Nordmark@Sun.COM goto cleanup;
14117880SJonathan.Anderson@Sun.COM }
14127880SJonathan.Anderson@Sun.COM
141311042SErik.Nordmark@Sun.COM /*
141411042SErik.Nordmark@Sun.COM * Release all refholds except the one for ires[0] that we
141511042SErik.Nordmark@Sun.COM * will return to the caller.
141611042SErik.Nordmark@Sun.COM */
141711042SErik.Nordmark@Sun.COM for (j = 1; j < i; j++)
141811042SErik.Nordmark@Sun.COM ire_refrele(ires[j]);
141911042SErik.Nordmark@Sun.COM
142011042SErik.Nordmark@Sun.COM if (invalidate) {
142111042SErik.Nordmark@Sun.COM /*
142211042SErik.Nordmark@Sun.COM * Since we needed to allocate but couldn't we need to make
142311042SErik.Nordmark@Sun.COM * sure that the dependency chain is rebuilt the next time.
142411042SErik.Nordmark@Sun.COM */
142511042SErik.Nordmark@Sun.COM ire_dep_invalidate_generations(ires[0]);
142611042SErik.Nordmark@Sun.COM generation = IRE_GENERATION_VERIFY;
142711042SErik.Nordmark@Sun.COM } else {
142811042SErik.Nordmark@Sun.COM /*
142911042SErik.Nordmark@Sun.COM * IREs can have been added or deleted while we did the
143011042SErik.Nordmark@Sun.COM * recursive lookup and we can't catch those until we've built
143111042SErik.Nordmark@Sun.COM * the dependencies. We verify the stored
143211042SErik.Nordmark@Sun.COM * ire_dep_parent_generation to catch any such changes and
143311042SErik.Nordmark@Sun.COM * return IRE_GENERATION_VERIFY (which will cause
143411042SErik.Nordmark@Sun.COM * ip_select_route to be called again so we can redo the
143511042SErik.Nordmark@Sun.COM * recursive lookup next time we send a packet.
143611042SErik.Nordmark@Sun.COM */
143711131SErik.Nordmark@Sun.COM if (ires[0]->ire_dep_parent == NULL)
143811131SErik.Nordmark@Sun.COM generation = ires[0]->ire_generation;
143911131SErik.Nordmark@Sun.COM else
144011131SErik.Nordmark@Sun.COM generation = ire_dep_validate_generations(ires[0]);
144111042SErik.Nordmark@Sun.COM if (generations[0] != ires[0]->ire_generation) {
144211042SErik.Nordmark@Sun.COM /* Something changed at the top */
144311042SErik.Nordmark@Sun.COM generation = IRE_GENERATION_VERIFY;
14447880SJonathan.Anderson@Sun.COM }
14457880SJonathan.Anderson@Sun.COM }
144611042SErik.Nordmark@Sun.COM if (generationp != NULL)
144711042SErik.Nordmark@Sun.COM *generationp = generation;
14487880SJonathan.Anderson@Sun.COM
144911042SErik.Nordmark@Sun.COM return (ires[0]);
145011042SErik.Nordmark@Sun.COM }
145111042SErik.Nordmark@Sun.COM
145211042SErik.Nordmark@Sun.COM ire_t *
ire_route_recursive_v6(const in6_addr_t * nexthop,uint_t ire_type,const ill_t * ill,zoneid_t zoneid,const ts_label_t * tsl,uint_t match_args,uint_t irr_flags,uint32_t xmit_hint,ip_stack_t * ipst,in6_addr_t * setsrcp,tsol_ire_gw_secattr_t ** gwattrp,uint_t * generationp)145311042SErik.Nordmark@Sun.COM ire_route_recursive_v6(const in6_addr_t *nexthop, uint_t ire_type,
145411042SErik.Nordmark@Sun.COM const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, uint_t match_args,
145511457SErik.Nordmark@Sun.COM uint_t irr_flags, uint32_t xmit_hint, ip_stack_t *ipst,
145611042SErik.Nordmark@Sun.COM in6_addr_t *setsrcp, tsol_ire_gw_secattr_t **gwattrp, uint_t *generationp)
145711042SErik.Nordmark@Sun.COM {
145811042SErik.Nordmark@Sun.COM return (ire_route_recursive_impl_v6(NULL, nexthop, ire_type, ill,
145911457SErik.Nordmark@Sun.COM zoneid, tsl, match_args, irr_flags, xmit_hint, ipst, setsrcp,
146011042SErik.Nordmark@Sun.COM gwattrp, generationp));
14617880SJonathan.Anderson@Sun.COM }
146211042SErik.Nordmark@Sun.COM
146311042SErik.Nordmark@Sun.COM /*
146411042SErik.Nordmark@Sun.COM * Recursively look for a route to the destination.
146511042SErik.Nordmark@Sun.COM * We only handle a destination match here, yet we have the same arguments
146611042SErik.Nordmark@Sun.COM * as the full match to allow function pointers to select between the two.
146711042SErik.Nordmark@Sun.COM *
146811042SErik.Nordmark@Sun.COM * Note that this function never returns NULL. It returns an IRE_NOROUTE
146911042SErik.Nordmark@Sun.COM * instead.
147011042SErik.Nordmark@Sun.COM *
147111042SErik.Nordmark@Sun.COM * If we find any IRE_LOCAL|BROADCAST etc past the first iteration it
147211042SErik.Nordmark@Sun.COM * is an error.
147311042SErik.Nordmark@Sun.COM * Allow at most one RTF_INDIRECT.
147411042SErik.Nordmark@Sun.COM */
147511042SErik.Nordmark@Sun.COM ire_t *
ire_route_recursive_dstonly_v6(const in6_addr_t * nexthop,uint_t irr_flags,uint32_t xmit_hint,ip_stack_t * ipst)147611457SErik.Nordmark@Sun.COM ire_route_recursive_dstonly_v6(const in6_addr_t *nexthop, uint_t irr_flags,
147711042SErik.Nordmark@Sun.COM uint32_t xmit_hint, ip_stack_t *ipst)
147811042SErik.Nordmark@Sun.COM {
147911042SErik.Nordmark@Sun.COM ire_t *ire;
148011042SErik.Nordmark@Sun.COM ire_t *ire1;
148111042SErik.Nordmark@Sun.COM uint_t generation;
148211042SErik.Nordmark@Sun.COM
148311042SErik.Nordmark@Sun.COM /* ire_ftable_lookup handles round-robin/ECMP */
148411042SErik.Nordmark@Sun.COM ire = ire_ftable_lookup_simple_v6(nexthop, xmit_hint, ipst,
148511042SErik.Nordmark@Sun.COM &generation);
148611042SErik.Nordmark@Sun.COM ASSERT(ire != NULL);
148711042SErik.Nordmark@Sun.COM
148811042SErik.Nordmark@Sun.COM /*
148911042SErik.Nordmark@Sun.COM * If the IRE has a current cached parent we know that the whole
149011042SErik.Nordmark@Sun.COM * parent chain is current, hence we don't need to discover and
149111042SErik.Nordmark@Sun.COM * build any dependencies by doing a recursive lookup.
149211042SErik.Nordmark@Sun.COM */
149311042SErik.Nordmark@Sun.COM mutex_enter(&ire->ire_lock);
1494*12985SSowmini.Varadhan@oracle.COM if (ire->ire_dep_parent != NULL) {
1495*12985SSowmini.Varadhan@oracle.COM if (ire->ire_dep_parent->ire_generation ==
1496*12985SSowmini.Varadhan@oracle.COM ire->ire_dep_parent_generation) {
1497*12985SSowmini.Varadhan@oracle.COM mutex_exit(&ire->ire_lock);
1498*12985SSowmini.Varadhan@oracle.COM return (ire);
1499*12985SSowmini.Varadhan@oracle.COM }
1500*12985SSowmini.Varadhan@oracle.COM mutex_exit(&ire->ire_lock);
1501*12985SSowmini.Varadhan@oracle.COM } else {
150211042SErik.Nordmark@Sun.COM mutex_exit(&ire->ire_lock);
1503*12985SSowmini.Varadhan@oracle.COM /*
1504*12985SSowmini.Varadhan@oracle.COM * If this type should have an ire_nce_cache (even if it
1505*12985SSowmini.Varadhan@oracle.COM * doesn't yet have one) then we are done. Includes
1506*12985SSowmini.Varadhan@oracle.COM * IRE_INTERFACE with a full 128 bit mask.
1507*12985SSowmini.Varadhan@oracle.COM */
1508*12985SSowmini.Varadhan@oracle.COM if (ire->ire_nce_capable)
1509*12985SSowmini.Varadhan@oracle.COM return (ire);
151011042SErik.Nordmark@Sun.COM }
151111042SErik.Nordmark@Sun.COM
151211042SErik.Nordmark@Sun.COM /*
151311042SErik.Nordmark@Sun.COM * Fallback to loop in the normal code starting with the ire
151411042SErik.Nordmark@Sun.COM * we found. Normally this would return the same ire.
151511042SErik.Nordmark@Sun.COM */
151611042SErik.Nordmark@Sun.COM ire1 = ire_route_recursive_impl_v6(ire, nexthop, 0, NULL, ALL_ZONES,
151711457SErik.Nordmark@Sun.COM NULL, MATCH_IRE_DSTONLY, irr_flags, xmit_hint, ipst, NULL, NULL,
151811042SErik.Nordmark@Sun.COM &generation);
151911042SErik.Nordmark@Sun.COM ire_refrele(ire);
152011042SErik.Nordmark@Sun.COM return (ire1);
152111042SErik.Nordmark@Sun.COM }
1522