10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51676Sjpk * Common Development and Distribution License (the "License"). 61676Sjpk * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 225940Ssowmini * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate /* 260Sstevel@tonic-gate * Copyright (c) 1990 Mentat Inc. 270Sstevel@tonic-gate */ 280Sstevel@tonic-gate 290Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 300Sstevel@tonic-gate 310Sstevel@tonic-gate /* 320Sstevel@tonic-gate * This file contains routines that manipulate Internet Routing Entries (IREs). 330Sstevel@tonic-gate */ 340Sstevel@tonic-gate #include <sys/types.h> 350Sstevel@tonic-gate #include <sys/stream.h> 360Sstevel@tonic-gate #include <sys/stropts.h> 370Sstevel@tonic-gate #include <sys/ddi.h> 380Sstevel@tonic-gate #include <sys/cmn_err.h> 390Sstevel@tonic-gate 400Sstevel@tonic-gate #include <sys/systm.h> 410Sstevel@tonic-gate #include <sys/param.h> 420Sstevel@tonic-gate #include <sys/socket.h> 430Sstevel@tonic-gate #include <net/if.h> 440Sstevel@tonic-gate #include <net/route.h> 450Sstevel@tonic-gate #include <netinet/in.h> 460Sstevel@tonic-gate #include <net/if_dl.h> 470Sstevel@tonic-gate #include <netinet/ip6.h> 480Sstevel@tonic-gate #include <netinet/icmp6.h> 490Sstevel@tonic-gate 500Sstevel@tonic-gate #include <inet/common.h> 510Sstevel@tonic-gate #include <inet/mi.h> 520Sstevel@tonic-gate #include <inet/ip.h> 530Sstevel@tonic-gate #include <inet/ip6.h> 540Sstevel@tonic-gate #include <inet/ip_ndp.h> 550Sstevel@tonic-gate #include <inet/ip_if.h> 560Sstevel@tonic-gate #include <inet/ip_ire.h> 570Sstevel@tonic-gate #include <inet/ipclassifier.h> 580Sstevel@tonic-gate #include <inet/nd.h> 590Sstevel@tonic-gate #include <sys/kmem.h> 600Sstevel@tonic-gate #include <sys/zone.h> 610Sstevel@tonic-gate 621676Sjpk #include <sys/tsol/label.h> 631676Sjpk #include <sys/tsol/tnet.h> 641676Sjpk 650Sstevel@tonic-gate static ire_t ire_null; 660Sstevel@tonic-gate 670Sstevel@tonic-gate static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 680Sstevel@tonic-gate static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 691676Sjpk const in6_addr_t *mask, const in6_addr_t *gateway, int type, 701676Sjpk const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 711676Sjpk const ts_label_t *tsl, int match_flags); 724714Ssowmini static ire_t *ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *, 734714Ssowmini const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *, 744714Ssowmini ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, 754714Ssowmini const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 760Sstevel@tonic-gate 770Sstevel@tonic-gate 780Sstevel@tonic-gate /* 790Sstevel@tonic-gate * Initialize the ire that is specific to IPv6 part and call 800Sstevel@tonic-gate * ire_init_common to finish it. 810Sstevel@tonic-gate */ 824714Ssowmini static ire_t * 834714Ssowmini ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask, 844714Ssowmini const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 854714Ssowmini uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type, 864714Ssowmini ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, 874714Ssowmini uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc, 884714Ssowmini tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 890Sstevel@tonic-gate { 902535Ssangeeta 911676Sjpk /* 921676Sjpk * Reject IRE security attribute creation/initialization 931676Sjpk * if system is not running in Trusted mode. 941676Sjpk */ 951676Sjpk if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 961676Sjpk return (NULL); 971676Sjpk 980Sstevel@tonic-gate 993448Sdh155122 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 1000Sstevel@tonic-gate ire->ire_addr_v6 = *v6addr; 1010Sstevel@tonic-gate 1020Sstevel@tonic-gate if (v6src_addr != NULL) 1030Sstevel@tonic-gate ire->ire_src_addr_v6 = *v6src_addr; 1040Sstevel@tonic-gate if (v6mask != NULL) { 1050Sstevel@tonic-gate ire->ire_mask_v6 = *v6mask; 1060Sstevel@tonic-gate ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 1070Sstevel@tonic-gate } 1080Sstevel@tonic-gate if (v6gateway != NULL) 1090Sstevel@tonic-gate ire->ire_gateway_addr_v6 = *v6gateway; 1100Sstevel@tonic-gate 1110Sstevel@tonic-gate if (type == IRE_CACHE && v6cmask != NULL) 1120Sstevel@tonic-gate ire->ire_cmask_v6 = *v6cmask; 1130Sstevel@tonic-gate 1140Sstevel@tonic-gate /* 1150Sstevel@tonic-gate * Multirouted packets need to have a fragment header added so that 1160Sstevel@tonic-gate * the receiver is able to discard duplicates according to their 1170Sstevel@tonic-gate * fragment identifier. 1180Sstevel@tonic-gate */ 1190Sstevel@tonic-gate if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 1200Sstevel@tonic-gate ire->ire_frag_flag = IPH_FRAG_HDR; 1210Sstevel@tonic-gate } 1220Sstevel@tonic-gate 1231676Sjpk /* ire_init_common will free the mblks upon encountering any failure */ 1244823Sseb if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, ipif, 1254823Sseb phandle, ihandle, flags, IPV6_VERSION, ulp_info, gc, gcgrp, ipst)) 1261676Sjpk return (NULL); 1270Sstevel@tonic-gate 1280Sstevel@tonic-gate return (ire); 1290Sstevel@tonic-gate } 1300Sstevel@tonic-gate 1310Sstevel@tonic-gate /* 1320Sstevel@tonic-gate * Similar to ire_create_v6 except that it is called only when 1330Sstevel@tonic-gate * we want to allocate ire as an mblk e.g. we have a external 1340Sstevel@tonic-gate * resolver. Do we need this in IPv6 ? 1354714Ssowmini * 1364714Ssowmini * IPv6 initializes the ire_nce in ire_add_v6, which expects to 1374714Ssowmini * find the ire_nce to be null when it is called. So, although 1384714Ssowmini * we have a src_nce parameter (in the interest of matching up with 1394714Ssowmini * the argument list of the v4 version), we ignore the src_nce 1404714Ssowmini * argument here. 1410Sstevel@tonic-gate */ 1424714Ssowmini /* ARGSUSED */ 1430Sstevel@tonic-gate ire_t * 1440Sstevel@tonic-gate ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 1450Sstevel@tonic-gate const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 1464714Ssowmini nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type, 1474714Ssowmini ipif_t *ipif, const in6_addr_t *v6cmask, 1481676Sjpk uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 1493448Sdh155122 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 1500Sstevel@tonic-gate { 1510Sstevel@tonic-gate ire_t *ire; 1520Sstevel@tonic-gate ire_t *ret_ire; 1530Sstevel@tonic-gate mblk_t *mp; 1540Sstevel@tonic-gate 1550Sstevel@tonic-gate ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 1560Sstevel@tonic-gate 1570Sstevel@tonic-gate /* Allocate the new IRE. */ 1580Sstevel@tonic-gate mp = allocb(sizeof (ire_t), BPRI_MED); 1590Sstevel@tonic-gate if (mp == NULL) { 1600Sstevel@tonic-gate ip1dbg(("ire_create_mp_v6: alloc failed\n")); 1610Sstevel@tonic-gate return (NULL); 1620Sstevel@tonic-gate } 1630Sstevel@tonic-gate 1640Sstevel@tonic-gate ire = (ire_t *)mp->b_rptr; 1650Sstevel@tonic-gate mp->b_wptr = (uchar_t *)&ire[1]; 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate /* Start clean. */ 1680Sstevel@tonic-gate *ire = ire_null; 1690Sstevel@tonic-gate ire->ire_mp = mp; 1700Sstevel@tonic-gate mp->b_datap->db_type = IRE_DB_TYPE; 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 1734714Ssowmini NULL, rfq, stq, type, ipif, v6cmask, phandle, 1743448Sdh155122 ihandle, flags, ulp_info, gc, gcgrp, ipst); 1750Sstevel@tonic-gate 1760Sstevel@tonic-gate if (ret_ire == NULL) { 1770Sstevel@tonic-gate freeb(ire->ire_mp); 1780Sstevel@tonic-gate return (NULL); 1790Sstevel@tonic-gate } 1800Sstevel@tonic-gate return (ire); 1810Sstevel@tonic-gate } 1820Sstevel@tonic-gate 1830Sstevel@tonic-gate /* 1840Sstevel@tonic-gate * ire_create_v6 is called to allocate and initialize a new IRE. 1850Sstevel@tonic-gate * 1860Sstevel@tonic-gate * NOTE : This is called as writer sometimes though not required 1870Sstevel@tonic-gate * by this function. 1884714Ssowmini * 1894714Ssowmini * See comments above ire_create_mp_v6() for the rationale behind the 1904714Ssowmini * unused src_nce argument. 1910Sstevel@tonic-gate */ 1924714Ssowmini /* ARGSUSED */ 1930Sstevel@tonic-gate ire_t * 1940Sstevel@tonic-gate ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 1950Sstevel@tonic-gate const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 1964714Ssowmini uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq, 1974714Ssowmini ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask, 1981676Sjpk uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 1993448Sdh155122 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 2000Sstevel@tonic-gate { 2010Sstevel@tonic-gate ire_t *ire; 2020Sstevel@tonic-gate ire_t *ret_ire; 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 2070Sstevel@tonic-gate if (ire == NULL) { 2080Sstevel@tonic-gate ip1dbg(("ire_create_v6: alloc failed\n")); 2090Sstevel@tonic-gate return (NULL); 2100Sstevel@tonic-gate } 2110Sstevel@tonic-gate *ire = ire_null; 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 2144714Ssowmini max_fragp, rfq, stq, type, ipif, v6cmask, phandle, 2153448Sdh155122 ihandle, flags, ulp_info, gc, gcgrp, ipst); 2160Sstevel@tonic-gate 2170Sstevel@tonic-gate if (ret_ire == NULL) { 2180Sstevel@tonic-gate kmem_cache_free(ire_cache, ire); 2190Sstevel@tonic-gate return (NULL); 2200Sstevel@tonic-gate } 2210Sstevel@tonic-gate ASSERT(ret_ire == ire); 2220Sstevel@tonic-gate return (ire); 2230Sstevel@tonic-gate } 2240Sstevel@tonic-gate 2250Sstevel@tonic-gate /* 2260Sstevel@tonic-gate * Find an IRE_INTERFACE for the multicast group. 2270Sstevel@tonic-gate * Allows different routes for multicast addresses 2280Sstevel@tonic-gate * in the unicast routing table (akin to FF::0/8 but could be more specific) 2290Sstevel@tonic-gate * which point at different interfaces. This is used when IPV6_MULTICAST_IF 2300Sstevel@tonic-gate * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 2310Sstevel@tonic-gate * specify the interface to join on. 2320Sstevel@tonic-gate * 2330Sstevel@tonic-gate * Supports link-local addresses by following the ipif/ill when recursing. 2340Sstevel@tonic-gate */ 2350Sstevel@tonic-gate ire_t * 2363448Sdh155122 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 2370Sstevel@tonic-gate { 2380Sstevel@tonic-gate ire_t *ire; 2390Sstevel@tonic-gate ipif_t *ipif = NULL; 2400Sstevel@tonic-gate int match_flags = MATCH_IRE_TYPE; 2410Sstevel@tonic-gate in6_addr_t gw_addr_v6; 2420Sstevel@tonic-gate 2430Sstevel@tonic-gate ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 2443448Sdh155122 zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst); 2450Sstevel@tonic-gate 2460Sstevel@tonic-gate /* We search a resolvable ire in case of multirouting. */ 2470Sstevel@tonic-gate if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 2480Sstevel@tonic-gate ire_t *cire = NULL; 2490Sstevel@tonic-gate /* 2500Sstevel@tonic-gate * If the route is not resolvable, the looked up ire 2510Sstevel@tonic-gate * may be changed here. In that case, ire_multirt_lookup() 2520Sstevel@tonic-gate * IRE_REFRELE the original ire and change it. 2530Sstevel@tonic-gate */ 2541676Sjpk (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 2553448Sdh155122 NULL, ipst); 2560Sstevel@tonic-gate if (cire != NULL) 2570Sstevel@tonic-gate ire_refrele(cire); 2580Sstevel@tonic-gate } 2590Sstevel@tonic-gate if (ire == NULL) 2600Sstevel@tonic-gate return (NULL); 2610Sstevel@tonic-gate /* 2620Sstevel@tonic-gate * Make sure we follow ire_ipif. 2630Sstevel@tonic-gate * 2640Sstevel@tonic-gate * We need to determine the interface route through 2650Sstevel@tonic-gate * which the gateway will be reached. We don't really 2660Sstevel@tonic-gate * care which interface is picked if the interface is 2670Sstevel@tonic-gate * part of a group. 2680Sstevel@tonic-gate */ 2690Sstevel@tonic-gate if (ire->ire_ipif != NULL) { 2700Sstevel@tonic-gate ipif = ire->ire_ipif; 2710Sstevel@tonic-gate match_flags |= MATCH_IRE_ILL_GROUP; 2720Sstevel@tonic-gate } 2730Sstevel@tonic-gate 2740Sstevel@tonic-gate switch (ire->ire_type) { 2750Sstevel@tonic-gate case IRE_DEFAULT: 2760Sstevel@tonic-gate case IRE_PREFIX: 2770Sstevel@tonic-gate case IRE_HOST: 2780Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 2790Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 2800Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 2810Sstevel@tonic-gate ire_refrele(ire); 2820Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 2830Sstevel@tonic-gate IRE_INTERFACE, ipif, NULL, zoneid, 0, 2843448Sdh155122 NULL, match_flags, ipst); 2850Sstevel@tonic-gate return (ire); 2860Sstevel@tonic-gate case IRE_IF_NORESOLVER: 2870Sstevel@tonic-gate case IRE_IF_RESOLVER: 2880Sstevel@tonic-gate return (ire); 2890Sstevel@tonic-gate default: 2900Sstevel@tonic-gate ire_refrele(ire); 2910Sstevel@tonic-gate return (NULL); 2920Sstevel@tonic-gate } 2930Sstevel@tonic-gate } 2940Sstevel@tonic-gate 2950Sstevel@tonic-gate /* 2960Sstevel@tonic-gate * Return any local address. We use this to target ourselves 2970Sstevel@tonic-gate * when the src address was specified as 'default'. 2980Sstevel@tonic-gate * Preference for IRE_LOCAL entries. 2990Sstevel@tonic-gate */ 3000Sstevel@tonic-gate ire_t * 3013448Sdh155122 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst) 3020Sstevel@tonic-gate { 3030Sstevel@tonic-gate ire_t *ire; 3040Sstevel@tonic-gate irb_t *irb; 3050Sstevel@tonic-gate ire_t *maybe = NULL; 3060Sstevel@tonic-gate int i; 3070Sstevel@tonic-gate 3083448Sdh155122 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 3093448Sdh155122 irb = &ipst->ips_ip_cache_table_v6[i]; 3100Sstevel@tonic-gate if (irb->irb_ire == NULL) 3110Sstevel@tonic-gate continue; 3120Sstevel@tonic-gate rw_enter(&irb->irb_lock, RW_READER); 3130Sstevel@tonic-gate for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 3140Sstevel@tonic-gate if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 3151676Sjpk ire->ire_zoneid != zoneid && 3161676Sjpk ire->ire_zoneid != ALL_ZONES) 3170Sstevel@tonic-gate continue; 3180Sstevel@tonic-gate switch (ire->ire_type) { 3190Sstevel@tonic-gate case IRE_LOOPBACK: 3200Sstevel@tonic-gate if (maybe == NULL) { 3210Sstevel@tonic-gate IRE_REFHOLD(ire); 3220Sstevel@tonic-gate maybe = ire; 3230Sstevel@tonic-gate } 3240Sstevel@tonic-gate break; 3250Sstevel@tonic-gate case IRE_LOCAL: 3260Sstevel@tonic-gate if (maybe != NULL) { 3270Sstevel@tonic-gate ire_refrele(maybe); 3280Sstevel@tonic-gate } 3290Sstevel@tonic-gate IRE_REFHOLD(ire); 3300Sstevel@tonic-gate rw_exit(&irb->irb_lock); 3310Sstevel@tonic-gate return (ire); 3320Sstevel@tonic-gate } 3330Sstevel@tonic-gate } 3340Sstevel@tonic-gate rw_exit(&irb->irb_lock); 3350Sstevel@tonic-gate } 3360Sstevel@tonic-gate return (maybe); 3370Sstevel@tonic-gate } 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate /* 3400Sstevel@tonic-gate * This function takes a mask and returns number of bits set in the 3410Sstevel@tonic-gate * mask (the represented prefix length). Assumes a contiguous mask. 3420Sstevel@tonic-gate */ 3430Sstevel@tonic-gate int 3440Sstevel@tonic-gate ip_mask_to_plen_v6(const in6_addr_t *v6mask) 3450Sstevel@tonic-gate { 3460Sstevel@tonic-gate int bits; 3470Sstevel@tonic-gate int plen = IPV6_ABITS; 3480Sstevel@tonic-gate int i; 3490Sstevel@tonic-gate 3500Sstevel@tonic-gate for (i = 3; i >= 0; i--) { 3510Sstevel@tonic-gate if (v6mask->s6_addr32[i] == 0) { 3520Sstevel@tonic-gate plen -= 32; 3530Sstevel@tonic-gate continue; 3540Sstevel@tonic-gate } 3550Sstevel@tonic-gate bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 3560Sstevel@tonic-gate if (bits == 0) 3570Sstevel@tonic-gate break; 3580Sstevel@tonic-gate plen -= bits; 3590Sstevel@tonic-gate } 3600Sstevel@tonic-gate 3610Sstevel@tonic-gate return (plen); 3620Sstevel@tonic-gate } 3630Sstevel@tonic-gate 3640Sstevel@tonic-gate /* 3650Sstevel@tonic-gate * Convert a prefix length to the mask for that prefix. 3660Sstevel@tonic-gate * Returns the argument bitmask. 3670Sstevel@tonic-gate */ 3680Sstevel@tonic-gate in6_addr_t * 3690Sstevel@tonic-gate ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 3700Sstevel@tonic-gate { 3710Sstevel@tonic-gate uint32_t *ptr; 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate if (plen < 0 || plen > IPV6_ABITS) 3740Sstevel@tonic-gate return (NULL); 3750Sstevel@tonic-gate *bitmask = ipv6_all_zeros; 3760Sstevel@tonic-gate 3770Sstevel@tonic-gate ptr = (uint32_t *)bitmask; 3780Sstevel@tonic-gate while (plen > 32) { 3790Sstevel@tonic-gate *ptr++ = 0xffffffffU; 3800Sstevel@tonic-gate plen -= 32; 3810Sstevel@tonic-gate } 3820Sstevel@tonic-gate *ptr = htonl(0xffffffffU << (32 - plen)); 3830Sstevel@tonic-gate return (bitmask); 3840Sstevel@tonic-gate } 3850Sstevel@tonic-gate 3860Sstevel@tonic-gate /* 3870Sstevel@tonic-gate * Add a fully initialized IRE to an appropriate 3880Sstevel@tonic-gate * table based on ire_type. 3890Sstevel@tonic-gate * 3903004Sdd193516 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and 3910Sstevel@tonic-gate * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 3920Sstevel@tonic-gate * 3930Sstevel@tonic-gate * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 3940Sstevel@tonic-gate * and IRE_CACHE. 3950Sstevel@tonic-gate * 3960Sstevel@tonic-gate * NOTE : This function is called as writer though not required 3970Sstevel@tonic-gate * by this function. 3980Sstevel@tonic-gate */ 3990Sstevel@tonic-gate int 4000Sstevel@tonic-gate ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 4010Sstevel@tonic-gate { 4020Sstevel@tonic-gate ire_t *ire1; 4030Sstevel@tonic-gate int mask_table_index; 4040Sstevel@tonic-gate irb_t *irb_ptr; 4050Sstevel@tonic-gate ire_t **irep; 4060Sstevel@tonic-gate int flags; 4070Sstevel@tonic-gate ire_t *pire = NULL; 4080Sstevel@tonic-gate ill_t *stq_ill; 4090Sstevel@tonic-gate boolean_t ndp_g_lock_held = B_FALSE; 4100Sstevel@tonic-gate ire_t *ire = *ire_p; 4110Sstevel@tonic-gate int error; 4123448Sdh155122 ip_stack_t *ipst = ire->ire_ipst; 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION); 4150Sstevel@tonic-gate ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 4160Sstevel@tonic-gate ASSERT(ire->ire_nce == NULL); 4170Sstevel@tonic-gate 4180Sstevel@tonic-gate /* Find the appropriate list head. */ 4190Sstevel@tonic-gate switch (ire->ire_type) { 4200Sstevel@tonic-gate case IRE_HOST: 4210Sstevel@tonic-gate ire->ire_mask_v6 = ipv6_all_ones; 4220Sstevel@tonic-gate ire->ire_masklen = IPV6_ABITS; 4230Sstevel@tonic-gate if ((ire->ire_flags & RTF_SETSRC) == 0) 4240Sstevel@tonic-gate ire->ire_src_addr_v6 = ipv6_all_zeros; 4250Sstevel@tonic-gate break; 4260Sstevel@tonic-gate case IRE_CACHE: 4270Sstevel@tonic-gate case IRE_LOCAL: 4280Sstevel@tonic-gate case IRE_LOOPBACK: 4290Sstevel@tonic-gate ire->ire_mask_v6 = ipv6_all_ones; 4300Sstevel@tonic-gate ire->ire_masklen = IPV6_ABITS; 4310Sstevel@tonic-gate break; 4320Sstevel@tonic-gate case IRE_PREFIX: 4330Sstevel@tonic-gate if ((ire->ire_flags & RTF_SETSRC) == 0) 4340Sstevel@tonic-gate ire->ire_src_addr_v6 = ipv6_all_zeros; 4350Sstevel@tonic-gate break; 4360Sstevel@tonic-gate case IRE_DEFAULT: 4370Sstevel@tonic-gate if ((ire->ire_flags & RTF_SETSRC) == 0) 4380Sstevel@tonic-gate ire->ire_src_addr_v6 = ipv6_all_zeros; 4390Sstevel@tonic-gate break; 4400Sstevel@tonic-gate case IRE_IF_RESOLVER: 4410Sstevel@tonic-gate case IRE_IF_NORESOLVER: 4420Sstevel@tonic-gate break; 4430Sstevel@tonic-gate default: 4440Sstevel@tonic-gate printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 4450Sstevel@tonic-gate (void *)ire, ire->ire_type); 4460Sstevel@tonic-gate ire_delete(ire); 4470Sstevel@tonic-gate *ire_p = NULL; 4480Sstevel@tonic-gate return (EINVAL); 4490Sstevel@tonic-gate } 4500Sstevel@tonic-gate 4510Sstevel@tonic-gate /* Make sure the address is properly masked. */ 4520Sstevel@tonic-gate V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 4530Sstevel@tonic-gate 4540Sstevel@tonic-gate if ((ire->ire_type & IRE_CACHETABLE) == 0) { 4550Sstevel@tonic-gate /* IRE goes into Forward Table */ 4560Sstevel@tonic-gate mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 4573448Sdh155122 if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == 4583448Sdh155122 NULL) { 4590Sstevel@tonic-gate irb_t *ptr; 4600Sstevel@tonic-gate int i; 4610Sstevel@tonic-gate 4623448Sdh155122 ptr = (irb_t *)mi_zalloc(( 4633448Sdh155122 ipst->ips_ip6_ftable_hash_size * sizeof (irb_t))); 4640Sstevel@tonic-gate if (ptr == NULL) { 4650Sstevel@tonic-gate ire_delete(ire); 4660Sstevel@tonic-gate *ire_p = NULL; 4670Sstevel@tonic-gate return (ENOMEM); 4680Sstevel@tonic-gate } 4693448Sdh155122 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 4700Sstevel@tonic-gate rw_init(&ptr[i].irb_lock, NULL, 4710Sstevel@tonic-gate RW_DEFAULT, NULL); 4720Sstevel@tonic-gate } 4733448Sdh155122 mutex_enter(&ipst->ips_ire_ft_init_lock); 4743448Sdh155122 if (ipst->ips_ip_forwarding_table_v6[ 4753448Sdh155122 mask_table_index] == NULL) { 4763448Sdh155122 ipst->ips_ip_forwarding_table_v6[ 4773448Sdh155122 mask_table_index] = ptr; 4783448Sdh155122 mutex_exit(&ipst->ips_ire_ft_init_lock); 4790Sstevel@tonic-gate } else { 4800Sstevel@tonic-gate /* 4810Sstevel@tonic-gate * Some other thread won the race in 4820Sstevel@tonic-gate * initializing the forwarding table at the 4830Sstevel@tonic-gate * same index. 4840Sstevel@tonic-gate */ 4853448Sdh155122 mutex_exit(&ipst->ips_ire_ft_init_lock); 4863448Sdh155122 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; 4873448Sdh155122 i++) { 4880Sstevel@tonic-gate rw_destroy(&ptr[i].irb_lock); 4890Sstevel@tonic-gate } 4900Sstevel@tonic-gate mi_free(ptr); 4910Sstevel@tonic-gate } 4920Sstevel@tonic-gate } 4933448Sdh155122 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 4940Sstevel@tonic-gate IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 4953448Sdh155122 ipst->ips_ip6_ftable_hash_size)]); 4960Sstevel@tonic-gate } else { 4973448Sdh155122 irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 4983448Sdh155122 ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]); 4990Sstevel@tonic-gate } 5000Sstevel@tonic-gate /* 5010Sstevel@tonic-gate * For xresolv interfaces (v6 interfaces with an external 5020Sstevel@tonic-gate * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 5030Sstevel@tonic-gate * are unable to prevent the deletion of the interface route 5040Sstevel@tonic-gate * while adding an IRE_CACHE for an on-link destination 5050Sstevel@tonic-gate * in the IRE_IF_RESOLVER case, since the ire has to go to 5060Sstevel@tonic-gate * the external resolver and return. We can't do a REFHOLD on the 5070Sstevel@tonic-gate * associated interface ire for fear of the message being freed 5080Sstevel@tonic-gate * if the external resolver can't resolve the address. 5090Sstevel@tonic-gate * Here we look up the interface ire in the forwarding table 5100Sstevel@tonic-gate * and make sure that the interface route has not been deleted. 5110Sstevel@tonic-gate */ 5120Sstevel@tonic-gate if (ire->ire_type == IRE_CACHE && 5130Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 5140Sstevel@tonic-gate (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 5150Sstevel@tonic-gate (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 5160Sstevel@tonic-gate 5170Sstevel@tonic-gate pire = ire_ihandle_lookup_onlink_v6(ire); 5180Sstevel@tonic-gate if (pire == NULL) { 5190Sstevel@tonic-gate ire_delete(ire); 5200Sstevel@tonic-gate *ire_p = NULL; 5210Sstevel@tonic-gate return (EINVAL); 5220Sstevel@tonic-gate } 5230Sstevel@tonic-gate /* Prevent pire from getting deleted */ 5240Sstevel@tonic-gate IRB_REFHOLD(pire->ire_bucket); 5250Sstevel@tonic-gate /* Has it been removed already? */ 5260Sstevel@tonic-gate if (pire->ire_marks & IRE_MARK_CONDEMNED) { 5270Sstevel@tonic-gate IRB_REFRELE(pire->ire_bucket); 5280Sstevel@tonic-gate ire_refrele(pire); 5290Sstevel@tonic-gate ire_delete(ire); 5300Sstevel@tonic-gate *ire_p = NULL; 5310Sstevel@tonic-gate return (EINVAL); 5320Sstevel@tonic-gate } 5330Sstevel@tonic-gate } 5340Sstevel@tonic-gate 5350Sstevel@tonic-gate flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 5360Sstevel@tonic-gate /* 5370Sstevel@tonic-gate * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 5380Sstevel@tonic-gate * for duplicates because : 5390Sstevel@tonic-gate * 5400Sstevel@tonic-gate * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 5410Sstevel@tonic-gate * pointing at different ills. A real duplicate is 5420Sstevel@tonic-gate * a match on both ire_ipif and ire_stq. 5430Sstevel@tonic-gate * 5440Sstevel@tonic-gate * 2) We could have multiple packets trying to create 5450Sstevel@tonic-gate * an IRE_CACHE for the same ill. 5460Sstevel@tonic-gate * 5470Sstevel@tonic-gate * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants 5480Sstevel@tonic-gate * to go out on a particular ill. Rather than looking at the 5490Sstevel@tonic-gate * packet, we depend on the above for MATCH_IRE_ILL here. 5500Sstevel@tonic-gate * 5510Sstevel@tonic-gate * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 5520Sstevel@tonic-gate * multiple IRE_CACHES for an ill for the same destination 5530Sstevel@tonic-gate * with various scoped addresses i.e represented by ipifs. 5540Sstevel@tonic-gate * 5550Sstevel@tonic-gate * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 5560Sstevel@tonic-gate */ 5570Sstevel@tonic-gate if (ire->ire_ipif != NULL) 5580Sstevel@tonic-gate flags |= MATCH_IRE_IPIF; 5590Sstevel@tonic-gate /* 5600Sstevel@tonic-gate * If we are creating hidden ires, make sure we search on 5610Sstevel@tonic-gate * this ill (MATCH_IRE_ILL) and a hidden ire, while we are 5620Sstevel@tonic-gate * searching for duplicates below. Otherwise we could 5630Sstevel@tonic-gate * potentially find an IRE on some other interface 5640Sstevel@tonic-gate * and it may not be a IRE marked with IRE_MARK_HIDDEN. We 5650Sstevel@tonic-gate * shouldn't do this as this will lead to an infinite loop as 5660Sstevel@tonic-gate * eventually we need an hidden ire for this packet to go 5670Sstevel@tonic-gate * out. MATCH_IRE_ILL is already marked above. 5680Sstevel@tonic-gate */ 5690Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_HIDDEN) { 5700Sstevel@tonic-gate ASSERT(ire->ire_type == IRE_CACHE); 5710Sstevel@tonic-gate flags |= MATCH_IRE_MARK_HIDDEN; 5720Sstevel@tonic-gate } 5730Sstevel@tonic-gate 5740Sstevel@tonic-gate /* 5750Sstevel@tonic-gate * Start the atomic add of the ire. Grab the ill locks, 5760Sstevel@tonic-gate * ill_g_usesrc_lock and the bucket lock. Check for condemned. 5772535Ssangeeta * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. 5780Sstevel@tonic-gate */ 5790Sstevel@tonic-gate if (ire->ire_type == IRE_CACHE) { 5803448Sdh155122 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 5810Sstevel@tonic-gate ndp_g_lock_held = B_TRUE; 5820Sstevel@tonic-gate } 5830Sstevel@tonic-gate 5840Sstevel@tonic-gate /* 5850Sstevel@tonic-gate * If ipif or ill is changing ire_atomic_start() may queue the 5860Sstevel@tonic-gate * request and return EINPROGRESS. 5870Sstevel@tonic-gate */ 5880Sstevel@tonic-gate 5890Sstevel@tonic-gate error = ire_atomic_start(irb_ptr, ire, q, mp, func); 5900Sstevel@tonic-gate if (error != 0) { 5910Sstevel@tonic-gate if (ndp_g_lock_held) 5923448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 5930Sstevel@tonic-gate /* 5940Sstevel@tonic-gate * We don't know whether it is a valid ipif or not. 5950Sstevel@tonic-gate * So, set it to NULL. This assumes that the ire has not added 5960Sstevel@tonic-gate * a reference to the ipif. 5970Sstevel@tonic-gate */ 5980Sstevel@tonic-gate ire->ire_ipif = NULL; 5990Sstevel@tonic-gate ire_delete(ire); 6000Sstevel@tonic-gate if (pire != NULL) { 6010Sstevel@tonic-gate IRB_REFRELE(pire->ire_bucket); 6020Sstevel@tonic-gate ire_refrele(pire); 6030Sstevel@tonic-gate } 6040Sstevel@tonic-gate *ire_p = NULL; 6050Sstevel@tonic-gate return (error); 6060Sstevel@tonic-gate } 6070Sstevel@tonic-gate /* 6080Sstevel@tonic-gate * To avoid creating ires having stale values for the ire_max_frag 6090Sstevel@tonic-gate * we get the latest value atomically here. For more details 6100Sstevel@tonic-gate * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 6110Sstevel@tonic-gate * in ip_rput_dlpi_writer 6120Sstevel@tonic-gate */ 6130Sstevel@tonic-gate if (ire->ire_max_fragp == NULL) { 6140Sstevel@tonic-gate if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 6150Sstevel@tonic-gate ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 6160Sstevel@tonic-gate else 6170Sstevel@tonic-gate ire->ire_max_frag = pire->ire_max_frag; 6180Sstevel@tonic-gate } else { 6190Sstevel@tonic-gate uint_t max_frag; 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate max_frag = *ire->ire_max_fragp; 6220Sstevel@tonic-gate ire->ire_max_fragp = NULL; 6230Sstevel@tonic-gate ire->ire_max_frag = max_frag; 6240Sstevel@tonic-gate } 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate /* 6270Sstevel@tonic-gate * Atomically check for duplicate and insert in the table. 6280Sstevel@tonic-gate */ 6290Sstevel@tonic-gate for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 6300Sstevel@tonic-gate if (ire1->ire_marks & IRE_MARK_CONDEMNED) 6310Sstevel@tonic-gate continue; 6320Sstevel@tonic-gate 6330Sstevel@tonic-gate if (ire->ire_type == IRE_CACHE) { 6340Sstevel@tonic-gate /* 6350Sstevel@tonic-gate * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 6360Sstevel@tonic-gate * As ire_ipif and ire_stq could point to two 6370Sstevel@tonic-gate * different ills, we can't pass just ire_ipif to 6380Sstevel@tonic-gate * ire_match_args and get a match on both ills. 6390Sstevel@tonic-gate * This is just needed for duplicate checks here and 6400Sstevel@tonic-gate * so we don't add an extra argument to 6410Sstevel@tonic-gate * ire_match_args for this. Do it locally. 6420Sstevel@tonic-gate * 6430Sstevel@tonic-gate * NOTE : Currently there is no part of the code 6440Sstevel@tonic-gate * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 6450Sstevel@tonic-gate * match for IRE_CACHEs. Thus we don't want to 6460Sstevel@tonic-gate * extend the arguments to ire_match_args_v6. 6470Sstevel@tonic-gate */ 6480Sstevel@tonic-gate if (ire1->ire_stq != ire->ire_stq) 6490Sstevel@tonic-gate continue; 6500Sstevel@tonic-gate /* 6510Sstevel@tonic-gate * Multiroute IRE_CACHEs for a given destination can 6520Sstevel@tonic-gate * have the same ire_ipif, typically if their source 6530Sstevel@tonic-gate * address is forced using RTF_SETSRC, and the same 6540Sstevel@tonic-gate * send-to queue. We differentiate them using the parent 6550Sstevel@tonic-gate * handle. 6560Sstevel@tonic-gate */ 6570Sstevel@tonic-gate if ((ire1->ire_flags & RTF_MULTIRT) && 6580Sstevel@tonic-gate (ire->ire_flags & RTF_MULTIRT) && 6590Sstevel@tonic-gate (ire1->ire_phandle != ire->ire_phandle)) 6600Sstevel@tonic-gate continue; 6610Sstevel@tonic-gate } 6620Sstevel@tonic-gate if (ire1->ire_zoneid != ire->ire_zoneid) 6630Sstevel@tonic-gate continue; 6640Sstevel@tonic-gate if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 6650Sstevel@tonic-gate &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 6661676Sjpk ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 6671676Sjpk flags)) { 6680Sstevel@tonic-gate /* 6690Sstevel@tonic-gate * Return the old ire after doing a REFHOLD. 6700Sstevel@tonic-gate * As most of the callers continue to use the IRE 6710Sstevel@tonic-gate * after adding, we return a held ire. This will 6720Sstevel@tonic-gate * avoid a lookup in the caller again. If the callers 6730Sstevel@tonic-gate * don't want to use it, they need to do a REFRELE. 6740Sstevel@tonic-gate */ 6750Sstevel@tonic-gate ip1dbg(("found dup ire existing %p new %p", 6760Sstevel@tonic-gate (void *)ire1, (void *)ire)); 6770Sstevel@tonic-gate IRE_REFHOLD(ire1); 6780Sstevel@tonic-gate if (ndp_g_lock_held) 6793448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 6800Sstevel@tonic-gate ire_atomic_end(irb_ptr, ire); 6810Sstevel@tonic-gate ire_delete(ire); 6820Sstevel@tonic-gate if (pire != NULL) { 6830Sstevel@tonic-gate /* 6840Sstevel@tonic-gate * Assert that it is 6850Sstevel@tonic-gate * not yet removed from the list. 6860Sstevel@tonic-gate */ 6870Sstevel@tonic-gate ASSERT(pire->ire_ptpn != NULL); 6880Sstevel@tonic-gate IRB_REFRELE(pire->ire_bucket); 6890Sstevel@tonic-gate ire_refrele(pire); 6900Sstevel@tonic-gate } 6910Sstevel@tonic-gate *ire_p = ire1; 6920Sstevel@tonic-gate return (0); 6930Sstevel@tonic-gate } 6940Sstevel@tonic-gate } 6950Sstevel@tonic-gate if (ire->ire_type == IRE_CACHE) { 6960Sstevel@tonic-gate in6_addr_t gw_addr_v6; 6970Sstevel@tonic-gate ill_t *ill = ire_to_ill(ire); 6980Sstevel@tonic-gate char buf[INET6_ADDRSTRLEN]; 6990Sstevel@tonic-gate nce_t *nce; 7000Sstevel@tonic-gate 7010Sstevel@tonic-gate /* 7020Sstevel@tonic-gate * All IRE_CACHE types must have a nce. If this is 7030Sstevel@tonic-gate * not the case the entry will not be added. We need 7040Sstevel@tonic-gate * to make sure that if somebody deletes the nce 7050Sstevel@tonic-gate * after we looked up, they will find this ire and 7060Sstevel@tonic-gate * delete the ire. To delete this ire one needs the 7070Sstevel@tonic-gate * bucket lock which we are still holding here. So, 7080Sstevel@tonic-gate * even if the nce gets deleted after we looked up, 7090Sstevel@tonic-gate * this ire will get deleted. 7100Sstevel@tonic-gate * 7110Sstevel@tonic-gate * NOTE : Don't need the ire_lock for accessing 7120Sstevel@tonic-gate * ire_gateway_addr_v6 as it is appearing first 7130Sstevel@tonic-gate * time on the list and rts_setgwr_v6 could not 7140Sstevel@tonic-gate * be changing this. 7150Sstevel@tonic-gate */ 7160Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 7170Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7182535Ssangeeta nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE); 7190Sstevel@tonic-gate } else { 7202535Ssangeeta nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE); 7210Sstevel@tonic-gate } 7220Sstevel@tonic-gate if (nce == NULL) 7230Sstevel@tonic-gate goto failed; 7240Sstevel@tonic-gate 7250Sstevel@tonic-gate /* Pair of refhold, refrele just to get the tracing right */ 7262535Ssangeeta NCE_REFHOLD_TO_REFHOLD_NOTR(nce); 7270Sstevel@tonic-gate /* 7280Sstevel@tonic-gate * Atomically make sure that new IREs don't point 7290Sstevel@tonic-gate * to an NCE that is logically deleted (CONDEMNED). 7300Sstevel@tonic-gate * ndp_delete() first marks the NCE CONDEMNED. 7310Sstevel@tonic-gate * This ensures that the nce_refcnt won't increase 7320Sstevel@tonic-gate * due to new nce_lookups or due to addition of new IREs 7330Sstevel@tonic-gate * pointing to this NCE. Then ndp_delete() cleans up 7340Sstevel@tonic-gate * existing references. If we don't do it atomically here, 7350Sstevel@tonic-gate * ndp_delete() -> nce_ire_delete() will not be able to 7360Sstevel@tonic-gate * clean up the IRE list completely, and the nce_refcnt 7370Sstevel@tonic-gate * won't go down to zero. 7380Sstevel@tonic-gate */ 7390Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 7400Sstevel@tonic-gate if (ill->ill_flags & ILLF_XRESOLV) { 7410Sstevel@tonic-gate /* 7420Sstevel@tonic-gate * If we used an external resolver, we may not 7430Sstevel@tonic-gate * have gone through neighbor discovery to get here. 7440Sstevel@tonic-gate * Must update the nce_state before the next check. 7450Sstevel@tonic-gate */ 7460Sstevel@tonic-gate if (nce->nce_state == ND_INCOMPLETE) 7470Sstevel@tonic-gate nce->nce_state = ND_REACHABLE; 7480Sstevel@tonic-gate } 7490Sstevel@tonic-gate if (nce->nce_state == ND_INCOMPLETE || 7500Sstevel@tonic-gate (nce->nce_flags & NCE_F_CONDEMNED) || 7510Sstevel@tonic-gate (nce->nce_state == ND_UNREACHABLE)) { 7520Sstevel@tonic-gate failed: 7530Sstevel@tonic-gate if (ndp_g_lock_held) 7543448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 7550Sstevel@tonic-gate if (nce != NULL) 7560Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7570Sstevel@tonic-gate ire_atomic_end(irb_ptr, ire); 7580Sstevel@tonic-gate ip1dbg(("ire_add_v6: No nce for dst %s \n", 7590Sstevel@tonic-gate inet_ntop(AF_INET6, &ire->ire_addr_v6, 7600Sstevel@tonic-gate buf, sizeof (buf)))); 7610Sstevel@tonic-gate ire_delete(ire); 7620Sstevel@tonic-gate if (pire != NULL) { 7630Sstevel@tonic-gate /* 7640Sstevel@tonic-gate * Assert that it is 7650Sstevel@tonic-gate * not yet removed from the list. 7660Sstevel@tonic-gate */ 7670Sstevel@tonic-gate ASSERT(pire->ire_ptpn != NULL); 7680Sstevel@tonic-gate IRB_REFRELE(pire->ire_bucket); 7690Sstevel@tonic-gate ire_refrele(pire); 7700Sstevel@tonic-gate } 7710Sstevel@tonic-gate if (nce != NULL) 7720Sstevel@tonic-gate NCE_REFRELE_NOTR(nce); 7730Sstevel@tonic-gate *ire_p = NULL; 7740Sstevel@tonic-gate return (EINVAL); 7750Sstevel@tonic-gate } else { 7760Sstevel@tonic-gate ire->ire_nce = nce; 7770Sstevel@tonic-gate } 7780Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7790Sstevel@tonic-gate } 7800Sstevel@tonic-gate /* 7810Sstevel@tonic-gate * Find the first entry that matches ire_addr - provides 7820Sstevel@tonic-gate * tail insertion. *irep will be null if no match. 7830Sstevel@tonic-gate */ 7840Sstevel@tonic-gate irep = (ire_t **)irb_ptr; 7850Sstevel@tonic-gate while ((ire1 = *irep) != NULL && 7860Sstevel@tonic-gate !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 7870Sstevel@tonic-gate irep = &ire1->ire_next; 7880Sstevel@tonic-gate ASSERT(!(ire->ire_type & IRE_BROADCAST)); 7890Sstevel@tonic-gate 7900Sstevel@tonic-gate if (*irep != NULL) { 7910Sstevel@tonic-gate /* 7920Sstevel@tonic-gate * Find the last ire which matches ire_addr_v6. 7930Sstevel@tonic-gate * Needed to do tail insertion among entries with the same 7940Sstevel@tonic-gate * ire_addr_v6. 7950Sstevel@tonic-gate */ 7960Sstevel@tonic-gate while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 7970Sstevel@tonic-gate &ire1->ire_addr_v6)) { 7980Sstevel@tonic-gate irep = &ire1->ire_next; 7990Sstevel@tonic-gate ire1 = *irep; 8000Sstevel@tonic-gate if (ire1 == NULL) 8010Sstevel@tonic-gate break; 8020Sstevel@tonic-gate } 8030Sstevel@tonic-gate } 8040Sstevel@tonic-gate 8050Sstevel@tonic-gate if (ire->ire_type == IRE_DEFAULT) { 8060Sstevel@tonic-gate /* 8070Sstevel@tonic-gate * We keep a count of default gateways which is used when 8080Sstevel@tonic-gate * assigning them as routes. 8090Sstevel@tonic-gate */ 8103448Sdh155122 ipst->ips_ipv6_ire_default_count++; 8113448Sdh155122 ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */ 8120Sstevel@tonic-gate } 8130Sstevel@tonic-gate /* Insert at *irep */ 8140Sstevel@tonic-gate ire1 = *irep; 8150Sstevel@tonic-gate if (ire1 != NULL) 8160Sstevel@tonic-gate ire1->ire_ptpn = &ire->ire_next; 8170Sstevel@tonic-gate ire->ire_next = ire1; 8180Sstevel@tonic-gate /* Link the new one in. */ 8190Sstevel@tonic-gate ire->ire_ptpn = irep; 8200Sstevel@tonic-gate /* 8210Sstevel@tonic-gate * ire_walk routines de-reference ire_next without holding 8220Sstevel@tonic-gate * a lock. Before we point to the new ire, we want to make 8230Sstevel@tonic-gate * sure the store that sets the ire_next of the new ire 8240Sstevel@tonic-gate * reaches global visibility, so that ire_walk routines 8250Sstevel@tonic-gate * don't see a truncated list of ires i.e if the ire_next 8260Sstevel@tonic-gate * of the new ire gets set after we do "*irep = ire" due 8270Sstevel@tonic-gate * to re-ordering, the ire_walk thread will see a NULL 8280Sstevel@tonic-gate * once it accesses the ire_next of the new ire. 8290Sstevel@tonic-gate * membar_producer() makes sure that the following store 8300Sstevel@tonic-gate * happens *after* all of the above stores. 8310Sstevel@tonic-gate */ 8320Sstevel@tonic-gate membar_producer(); 8330Sstevel@tonic-gate *irep = ire; 8340Sstevel@tonic-gate ire->ire_bucket = irb_ptr; 8350Sstevel@tonic-gate /* 8360Sstevel@tonic-gate * We return a bumped up IRE above. Keep it symmetrical 8370Sstevel@tonic-gate * so that the callers will always have to release. This 8380Sstevel@tonic-gate * helps the callers of this function because they continue 8390Sstevel@tonic-gate * to use the IRE after adding and hence they don't have to 8400Sstevel@tonic-gate * lookup again after we return the IRE. 8410Sstevel@tonic-gate * 8420Sstevel@tonic-gate * NOTE : We don't have to use atomics as this is appearing 8430Sstevel@tonic-gate * in the list for the first time and no one else can bump 8440Sstevel@tonic-gate * up the reference count on this yet. 8450Sstevel@tonic-gate */ 8460Sstevel@tonic-gate IRE_REFHOLD_LOCKED(ire); 8473448Sdh155122 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 8480Sstevel@tonic-gate irb_ptr->irb_ire_cnt++; 8490Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_TEMPORARY) 8500Sstevel@tonic-gate irb_ptr->irb_tmp_ire_cnt++; 8510Sstevel@tonic-gate 8520Sstevel@tonic-gate if (ire->ire_ipif != NULL) { 853*6255Ssowmini DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ire->ire_ipif, 854*6255Ssowmini (char *), "ire", (void *), ire); 855*6255Ssowmini ire->ire_ipif->ipif_cnt_ire++; 8560Sstevel@tonic-gate if (ire->ire_stq != NULL) { 8570Sstevel@tonic-gate stq_ill = (ill_t *)ire->ire_stq->q_ptr; 858*6255Ssowmini DTRACE_PROBE3(ill__incr__cnt, (ill_t *), stq_ill, 859*6255Ssowmini (char *), "ire", (void *), ire); 860*6255Ssowmini stq_ill->ill_cnt_ire++; 8610Sstevel@tonic-gate } 8620Sstevel@tonic-gate } else { 8630Sstevel@tonic-gate ASSERT(ire->ire_stq == NULL); 8640Sstevel@tonic-gate } 8650Sstevel@tonic-gate 8660Sstevel@tonic-gate if (ndp_g_lock_held) 8673448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 8680Sstevel@tonic-gate ire_atomic_end(irb_ptr, ire); 8690Sstevel@tonic-gate 8700Sstevel@tonic-gate if (pire != NULL) { 8710Sstevel@tonic-gate /* Assert that it is not removed from the list yet */ 8720Sstevel@tonic-gate ASSERT(pire->ire_ptpn != NULL); 8730Sstevel@tonic-gate IRB_REFRELE(pire->ire_bucket); 8740Sstevel@tonic-gate ire_refrele(pire); 8750Sstevel@tonic-gate } 8760Sstevel@tonic-gate 8770Sstevel@tonic-gate if (ire->ire_type != IRE_CACHE) { 8780Sstevel@tonic-gate /* 8790Sstevel@tonic-gate * For ire's with with host mask see if there is an entry 8800Sstevel@tonic-gate * in the cache. If there is one flush the whole cache as 8810Sstevel@tonic-gate * there might be multiple entries due to RTF_MULTIRT (CGTP). 8820Sstevel@tonic-gate * If no entry is found than there is no need to flush the 8830Sstevel@tonic-gate * cache. 8840Sstevel@tonic-gate */ 8850Sstevel@tonic-gate 8860Sstevel@tonic-gate if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 8870Sstevel@tonic-gate ire_t *lire; 8880Sstevel@tonic-gate lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 8893448Sdh155122 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 8903448Sdh155122 ipst); 8910Sstevel@tonic-gate if (lire != NULL) { 8920Sstevel@tonic-gate ire_refrele(lire); 8930Sstevel@tonic-gate ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 8940Sstevel@tonic-gate } 8950Sstevel@tonic-gate } else { 8960Sstevel@tonic-gate ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 8970Sstevel@tonic-gate } 8980Sstevel@tonic-gate } 8990Sstevel@tonic-gate 9000Sstevel@tonic-gate *ire_p = ire; 9010Sstevel@tonic-gate return (0); 9020Sstevel@tonic-gate } 9030Sstevel@tonic-gate 9040Sstevel@tonic-gate /* 9050Sstevel@tonic-gate * Search for all HOST REDIRECT routes that are 9060Sstevel@tonic-gate * pointing at the specified gateway and 9070Sstevel@tonic-gate * delete them. This routine is called only 9080Sstevel@tonic-gate * when a default gateway is going away. 9090Sstevel@tonic-gate */ 9100Sstevel@tonic-gate static void 9113448Sdh155122 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 9120Sstevel@tonic-gate { 9130Sstevel@tonic-gate irb_t *irb_ptr; 9140Sstevel@tonic-gate irb_t *irb; 9150Sstevel@tonic-gate ire_t *ire; 9160Sstevel@tonic-gate in6_addr_t gw_addr_v6; 9170Sstevel@tonic-gate int i; 9180Sstevel@tonic-gate 9190Sstevel@tonic-gate /* get the hash table for HOST routes */ 9203448Sdh155122 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 9210Sstevel@tonic-gate if (irb_ptr == NULL) 9220Sstevel@tonic-gate return; 9233448Sdh155122 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 9240Sstevel@tonic-gate irb = &irb_ptr[i]; 9250Sstevel@tonic-gate IRB_REFHOLD(irb); 9260Sstevel@tonic-gate for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 9273004Sdd193516 if (!(ire->ire_flags & RTF_DYNAMIC)) 9280Sstevel@tonic-gate continue; 9290Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 9300Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 9310Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 9320Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 9330Sstevel@tonic-gate ire_delete(ire); 9340Sstevel@tonic-gate } 9350Sstevel@tonic-gate IRB_REFRELE(irb); 9360Sstevel@tonic-gate } 9370Sstevel@tonic-gate } 9380Sstevel@tonic-gate 9390Sstevel@tonic-gate /* 9400Sstevel@tonic-gate * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 9410Sstevel@tonic-gate * of ip_ire_clookup_and_delete. The difference being this function does not 9420Sstevel@tonic-gate * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 9430Sstevel@tonic-gate * different than IPv4 in that, regardless of the presence of a cache entry 9440Sstevel@tonic-gate * for this address, an ire_walk_v6 is done. Another difference is that unlike 9450Sstevel@tonic-gate * in the case of IPv4 this does not take an ipif_t argument, since it is only 9460Sstevel@tonic-gate * called by ip_arp_news and the match is always only on the address. 9470Sstevel@tonic-gate */ 9480Sstevel@tonic-gate void 9493448Sdh155122 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst) 9500Sstevel@tonic-gate { 9510Sstevel@tonic-gate irb_t *irb; 9520Sstevel@tonic-gate ire_t *cire; 9530Sstevel@tonic-gate boolean_t found = B_FALSE; 9540Sstevel@tonic-gate 9553448Sdh155122 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 9564714Ssowmini ipst->ips_ip6_cache_table_size)]; 9570Sstevel@tonic-gate IRB_REFHOLD(irb); 9580Sstevel@tonic-gate for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 9593448Sdh155122 if (cire->ire_marks & IRE_MARK_CONDEMNED) 9600Sstevel@tonic-gate continue; 9610Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 9620Sstevel@tonic-gate 9630Sstevel@tonic-gate /* This signifies start of a match */ 9640Sstevel@tonic-gate if (!found) 9650Sstevel@tonic-gate found = B_TRUE; 9660Sstevel@tonic-gate if (cire->ire_type == IRE_CACHE) { 9670Sstevel@tonic-gate if (cire->ire_nce != NULL) 9680Sstevel@tonic-gate ndp_delete(cire->ire_nce); 9690Sstevel@tonic-gate ire_delete_v6(cire); 9700Sstevel@tonic-gate } 9710Sstevel@tonic-gate /* End of the match */ 9720Sstevel@tonic-gate } else if (found) 9730Sstevel@tonic-gate break; 9740Sstevel@tonic-gate } 9750Sstevel@tonic-gate IRB_REFRELE(irb); 9760Sstevel@tonic-gate } 9770Sstevel@tonic-gate 9780Sstevel@tonic-gate /* 9790Sstevel@tonic-gate * Delete the specified IRE. 9800Sstevel@tonic-gate * All calls should use ire_delete(). 9810Sstevel@tonic-gate * Sometimes called as writer though not required by this function. 9820Sstevel@tonic-gate * 9830Sstevel@tonic-gate * NOTE : This function is called only if the ire was added 9840Sstevel@tonic-gate * in the list. 9850Sstevel@tonic-gate */ 9860Sstevel@tonic-gate void 9870Sstevel@tonic-gate ire_delete_v6(ire_t *ire) 9880Sstevel@tonic-gate { 9890Sstevel@tonic-gate in6_addr_t gw_addr_v6; 9903448Sdh155122 ip_stack_t *ipst = ire->ire_ipst; 9910Sstevel@tonic-gate 9920Sstevel@tonic-gate ASSERT(ire->ire_refcnt >= 1); 9930Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION); 9940Sstevel@tonic-gate 9950Sstevel@tonic-gate if (ire->ire_type != IRE_CACHE) 9960Sstevel@tonic-gate ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 9970Sstevel@tonic-gate if (ire->ire_type == IRE_DEFAULT) { 9980Sstevel@tonic-gate /* 9990Sstevel@tonic-gate * when a default gateway is going away 10000Sstevel@tonic-gate * delete all the host redirects pointing at that 10010Sstevel@tonic-gate * gateway. 10020Sstevel@tonic-gate */ 10030Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 10040Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 10050Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 10063448Sdh155122 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 10070Sstevel@tonic-gate } 10080Sstevel@tonic-gate } 10090Sstevel@tonic-gate 10100Sstevel@tonic-gate /* 10113004Sdd193516 * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect 10120Sstevel@tonic-gate * entries. 10130Sstevel@tonic-gate */ 10140Sstevel@tonic-gate /*ARGSUSED1*/ 10150Sstevel@tonic-gate void 10160Sstevel@tonic-gate ire_delete_cache_v6(ire_t *ire, char *arg) 10170Sstevel@tonic-gate { 10180Sstevel@tonic-gate char addrstr1[INET6_ADDRSTRLEN]; 10190Sstevel@tonic-gate char addrstr2[INET6_ADDRSTRLEN]; 10200Sstevel@tonic-gate 10213004Sdd193516 if ((ire->ire_type & IRE_CACHE) || 10223004Sdd193516 (ire->ire_flags & RTF_DYNAMIC)) { 10230Sstevel@tonic-gate ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 10240Sstevel@tonic-gate inet_ntop(AF_INET6, &ire->ire_addr_v6, 10254714Ssowmini addrstr1, sizeof (addrstr1)), 10260Sstevel@tonic-gate ire->ire_type, 10270Sstevel@tonic-gate inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 10284714Ssowmini addrstr2, sizeof (addrstr2)))); 10290Sstevel@tonic-gate ire_delete(ire); 10300Sstevel@tonic-gate } 10310Sstevel@tonic-gate 10320Sstevel@tonic-gate } 10330Sstevel@tonic-gate 10340Sstevel@tonic-gate /* 10353004Sdd193516 * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries 10360Sstevel@tonic-gate * that have a given gateway address. 10370Sstevel@tonic-gate */ 10380Sstevel@tonic-gate void 10390Sstevel@tonic-gate ire_delete_cache_gw_v6(ire_t *ire, char *addr) 10400Sstevel@tonic-gate { 10410Sstevel@tonic-gate in6_addr_t *gw_addr = (in6_addr_t *)addr; 10420Sstevel@tonic-gate char buf1[INET6_ADDRSTRLEN]; 10430Sstevel@tonic-gate char buf2[INET6_ADDRSTRLEN]; 10440Sstevel@tonic-gate in6_addr_t ire_gw_addr_v6; 10450Sstevel@tonic-gate 10463004Sdd193516 if (!(ire->ire_type & IRE_CACHE) && 10473004Sdd193516 !(ire->ire_flags & RTF_DYNAMIC)) 10480Sstevel@tonic-gate return; 10490Sstevel@tonic-gate 10500Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 10510Sstevel@tonic-gate ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 10520Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 10530Sstevel@tonic-gate 10540Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 10550Sstevel@tonic-gate ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 10560Sstevel@tonic-gate inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 10570Sstevel@tonic-gate buf1, sizeof (buf1)), 10580Sstevel@tonic-gate ire->ire_type, 10590Sstevel@tonic-gate inet_ntop(AF_INET6, &ire_gw_addr_v6, 10600Sstevel@tonic-gate buf2, sizeof (buf2)))); 10610Sstevel@tonic-gate ire_delete(ire); 10620Sstevel@tonic-gate } 10630Sstevel@tonic-gate } 10640Sstevel@tonic-gate 10650Sstevel@tonic-gate /* 10660Sstevel@tonic-gate * Remove all IRE_CACHE entries that match 10670Sstevel@tonic-gate * the ire specified. (Sometimes called 10680Sstevel@tonic-gate * as writer though not required by this function.) 10690Sstevel@tonic-gate * 10700Sstevel@tonic-gate * The flag argument indicates if the 10710Sstevel@tonic-gate * flush request is due to addition 10720Sstevel@tonic-gate * of new route (IRE_FLUSH_ADD) or deletion of old 10730Sstevel@tonic-gate * route (IRE_FLUSH_DELETE). 10740Sstevel@tonic-gate * 10750Sstevel@tonic-gate * This routine takes only the IREs from the forwarding 10760Sstevel@tonic-gate * table and flushes the corresponding entries from 10770Sstevel@tonic-gate * the cache table. 10780Sstevel@tonic-gate * 10790Sstevel@tonic-gate * When flushing due to the deletion of an old route, it 10800Sstevel@tonic-gate * just checks the cache handles (ire_phandle and ire_ihandle) and 10810Sstevel@tonic-gate * deletes the ones that match. 10820Sstevel@tonic-gate * 10830Sstevel@tonic-gate * When flushing due to the creation of a new route, it checks 10840Sstevel@tonic-gate * if a cache entry's address matches the one in the IRE and 10850Sstevel@tonic-gate * that the cache entry's parent has a less specific mask than the 10860Sstevel@tonic-gate * one in IRE. The destination of such a cache entry could be the 10870Sstevel@tonic-gate * gateway for other cache entries, so we need to flush those as 10880Sstevel@tonic-gate * well by looking for gateway addresses matching the IRE's address. 10890Sstevel@tonic-gate */ 10900Sstevel@tonic-gate void 10910Sstevel@tonic-gate ire_flush_cache_v6(ire_t *ire, int flag) 10920Sstevel@tonic-gate { 10930Sstevel@tonic-gate int i; 10940Sstevel@tonic-gate ire_t *cire; 10950Sstevel@tonic-gate irb_t *irb; 10963448Sdh155122 ip_stack_t *ipst = ire->ire_ipst; 10970Sstevel@tonic-gate 10980Sstevel@tonic-gate if (ire->ire_type & IRE_CACHE) 10994714Ssowmini return; 11000Sstevel@tonic-gate 11010Sstevel@tonic-gate /* 11020Sstevel@tonic-gate * If a default is just created, there is no point 11030Sstevel@tonic-gate * in going through the cache, as there will not be any 11040Sstevel@tonic-gate * cached ires. 11050Sstevel@tonic-gate */ 11060Sstevel@tonic-gate if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 11070Sstevel@tonic-gate return; 11080Sstevel@tonic-gate if (flag == IRE_FLUSH_ADD) { 11090Sstevel@tonic-gate /* 11100Sstevel@tonic-gate * This selective flush is 11110Sstevel@tonic-gate * due to the addition of 11120Sstevel@tonic-gate * new IRE. 11130Sstevel@tonic-gate */ 11143448Sdh155122 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 11153448Sdh155122 irb = &ipst->ips_ip_cache_table_v6[i]; 11160Sstevel@tonic-gate if ((cire = irb->irb_ire) == NULL) 11170Sstevel@tonic-gate continue; 11180Sstevel@tonic-gate IRB_REFHOLD(irb); 11190Sstevel@tonic-gate for (cire = irb->irb_ire; cire != NULL; 11200Sstevel@tonic-gate cire = cire->ire_next) { 11210Sstevel@tonic-gate if (cire->ire_type != IRE_CACHE) 11220Sstevel@tonic-gate continue; 11230Sstevel@tonic-gate /* 11240Sstevel@tonic-gate * If 'cire' belongs to the same subnet 11250Sstevel@tonic-gate * as the new ire being added, and 'cire' 11260Sstevel@tonic-gate * is derived from a prefix that is less 11270Sstevel@tonic-gate * specific than the new ire being added, 11280Sstevel@tonic-gate * we need to flush 'cire'; for instance, 11290Sstevel@tonic-gate * when a new interface comes up. 11300Sstevel@tonic-gate */ 11310Sstevel@tonic-gate if ((V6_MASK_EQ_2(cire->ire_addr_v6, 11320Sstevel@tonic-gate ire->ire_mask_v6, ire->ire_addr_v6) && 11330Sstevel@tonic-gate (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 11340Sstevel@tonic-gate ire->ire_masklen))) { 11350Sstevel@tonic-gate ire_delete(cire); 11360Sstevel@tonic-gate continue; 11370Sstevel@tonic-gate } 11380Sstevel@tonic-gate /* 11390Sstevel@tonic-gate * This is the case when the ire_gateway_addr 11400Sstevel@tonic-gate * of 'cire' belongs to the same subnet as 11410Sstevel@tonic-gate * the new ire being added. 11420Sstevel@tonic-gate * Flushing such ires is sometimes required to 11430Sstevel@tonic-gate * avoid misrouting: say we have a machine with 11440Sstevel@tonic-gate * two interfaces (I1 and I2), a default router 11450Sstevel@tonic-gate * R on the I1 subnet, and a host route to an 11460Sstevel@tonic-gate * off-link destination D with a gateway G on 11470Sstevel@tonic-gate * the I2 subnet. 11480Sstevel@tonic-gate * Under normal operation, we will have an 11490Sstevel@tonic-gate * on-link cache entry for G and an off-link 11500Sstevel@tonic-gate * cache entry for D with G as ire_gateway_addr, 11510Sstevel@tonic-gate * traffic to D will reach its destination 11520Sstevel@tonic-gate * through gateway G. 11530Sstevel@tonic-gate * If the administrator does 'ifconfig I2 down', 11540Sstevel@tonic-gate * the cache entries for D and G will be 11550Sstevel@tonic-gate * flushed. However, G will now be resolved as 11560Sstevel@tonic-gate * an off-link destination using R (the default 11570Sstevel@tonic-gate * router) as gateway. Then D will also be 11580Sstevel@tonic-gate * resolved as an off-link destination using G 11590Sstevel@tonic-gate * as gateway - this behavior is due to 11600Sstevel@tonic-gate * compatibility reasons, see comment in 11610Sstevel@tonic-gate * ire_ihandle_lookup_offlink(). Traffic to D 11620Sstevel@tonic-gate * will go to the router R and probably won't 11630Sstevel@tonic-gate * reach the destination. 11640Sstevel@tonic-gate * The administrator then does 'ifconfig I2 up'. 11650Sstevel@tonic-gate * Since G is on the I2 subnet, this routine 11660Sstevel@tonic-gate * will flush its cache entry. It must also 11670Sstevel@tonic-gate * flush the cache entry for D, otherwise 11680Sstevel@tonic-gate * traffic will stay misrouted until the IRE 11690Sstevel@tonic-gate * times out. 11700Sstevel@tonic-gate */ 11710Sstevel@tonic-gate if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 11720Sstevel@tonic-gate ire->ire_mask_v6, ire->ire_addr_v6)) { 11730Sstevel@tonic-gate ire_delete(cire); 11740Sstevel@tonic-gate continue; 11750Sstevel@tonic-gate } 11760Sstevel@tonic-gate } 11770Sstevel@tonic-gate IRB_REFRELE(irb); 11780Sstevel@tonic-gate } 11790Sstevel@tonic-gate } else { 11800Sstevel@tonic-gate /* 11810Sstevel@tonic-gate * delete the cache entries based on 11820Sstevel@tonic-gate * handle in the IRE as this IRE is 11830Sstevel@tonic-gate * being deleted/changed. 11840Sstevel@tonic-gate */ 11853448Sdh155122 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 11863448Sdh155122 irb = &ipst->ips_ip_cache_table_v6[i]; 11870Sstevel@tonic-gate if ((cire = irb->irb_ire) == NULL) 11880Sstevel@tonic-gate continue; 11890Sstevel@tonic-gate IRB_REFHOLD(irb); 11900Sstevel@tonic-gate for (cire = irb->irb_ire; cire != NULL; 11910Sstevel@tonic-gate cire = cire->ire_next) { 11920Sstevel@tonic-gate if (cire->ire_type != IRE_CACHE) 11930Sstevel@tonic-gate continue; 11940Sstevel@tonic-gate if ((cire->ire_phandle == 0 || 11950Sstevel@tonic-gate cire->ire_phandle != ire->ire_phandle) && 11960Sstevel@tonic-gate (cire->ire_ihandle == 0 || 11970Sstevel@tonic-gate cire->ire_ihandle != ire->ire_ihandle)) 11980Sstevel@tonic-gate continue; 11990Sstevel@tonic-gate ire_delete(cire); 12000Sstevel@tonic-gate } 12010Sstevel@tonic-gate IRB_REFRELE(irb); 12020Sstevel@tonic-gate } 12030Sstevel@tonic-gate } 12040Sstevel@tonic-gate } 12050Sstevel@tonic-gate 12060Sstevel@tonic-gate /* 12070Sstevel@tonic-gate * Matches the arguments passed with the values in the ire. 12080Sstevel@tonic-gate * 12090Sstevel@tonic-gate * Note: for match types that match using "ipif" passed in, ipif 12100Sstevel@tonic-gate * must be checked for non-NULL before calling this routine. 12110Sstevel@tonic-gate */ 12120Sstevel@tonic-gate static boolean_t 12130Sstevel@tonic-gate ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 12141676Sjpk const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 12151676Sjpk uint32_t ihandle, const ts_label_t *tsl, int match_flags) 12160Sstevel@tonic-gate { 12170Sstevel@tonic-gate in6_addr_t masked_addr; 12180Sstevel@tonic-gate in6_addr_t gw_addr_v6; 12190Sstevel@tonic-gate ill_t *ire_ill = NULL, *dst_ill; 12200Sstevel@tonic-gate ill_t *ipif_ill = NULL; 12210Sstevel@tonic-gate ill_group_t *ire_ill_group = NULL; 12220Sstevel@tonic-gate ill_group_t *ipif_ill_group = NULL; 12230Sstevel@tonic-gate ipif_t *src_ipif; 12240Sstevel@tonic-gate 12250Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION); 12260Sstevel@tonic-gate ASSERT(addr != NULL); 12270Sstevel@tonic-gate ASSERT(mask != NULL); 12280Sstevel@tonic-gate ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 12290Sstevel@tonic-gate ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) || 12300Sstevel@tonic-gate (ipif != NULL && ipif->ipif_isv6)); 12310Sstevel@tonic-gate 12320Sstevel@tonic-gate /* 12330Sstevel@tonic-gate * HIDDEN cache entries have to be looked up specifically with 12340Sstevel@tonic-gate * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set 12350Sstevel@tonic-gate * when the interface is FAILED or INACTIVE. In that case, 12360Sstevel@tonic-gate * any IRE_CACHES that exists should be marked with 12370Sstevel@tonic-gate * IRE_MARK_HIDDEN. So, we don't really need to match below 12380Sstevel@tonic-gate * for IRE_MARK_HIDDEN. But we do so for consistency. 12390Sstevel@tonic-gate */ 12400Sstevel@tonic-gate if (!(match_flags & MATCH_IRE_MARK_HIDDEN) && 12410Sstevel@tonic-gate (ire->ire_marks & IRE_MARK_HIDDEN)) 12420Sstevel@tonic-gate return (B_FALSE); 12430Sstevel@tonic-gate 12441676Sjpk if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 12451676Sjpk ire->ire_zoneid != ALL_ZONES) { 12460Sstevel@tonic-gate /* 12470Sstevel@tonic-gate * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 12480Sstevel@tonic-gate * valid and does not match that of ire_zoneid, a failure to 12490Sstevel@tonic-gate * match is reported at this point. Otherwise, since some IREs 12500Sstevel@tonic-gate * that are available in the global zone can be used in local 12510Sstevel@tonic-gate * zones, additional checks need to be performed: 12520Sstevel@tonic-gate * 12530Sstevel@tonic-gate * IRE_CACHE and IRE_LOOPBACK entries should 12540Sstevel@tonic-gate * never be matched in this situation. 12550Sstevel@tonic-gate * 12560Sstevel@tonic-gate * IRE entries that have an interface associated with them 12570Sstevel@tonic-gate * should in general not match unless they are an IRE_LOCAL 12580Sstevel@tonic-gate * or in the case when MATCH_IRE_DEFAULT has been set in 12590Sstevel@tonic-gate * the caller. In the case of the former, checking of the 12600Sstevel@tonic-gate * other fields supplied should take place. 12610Sstevel@tonic-gate * 12620Sstevel@tonic-gate * In the case where MATCH_IRE_DEFAULT has been set, 12630Sstevel@tonic-gate * all of the ipif's associated with the IRE's ill are 12640Sstevel@tonic-gate * checked to see if there is a matching zoneid. If any 12650Sstevel@tonic-gate * one ipif has a matching zoneid, this IRE is a 12660Sstevel@tonic-gate * potential candidate so checking of the other fields 12670Sstevel@tonic-gate * takes place. 12680Sstevel@tonic-gate * 12690Sstevel@tonic-gate * In the case where the IRE_INTERFACE has a usable source 12700Sstevel@tonic-gate * address (indicated by ill_usesrc_ifindex) in the 12710Sstevel@tonic-gate * correct zone then it's permitted to return this IRE 12720Sstevel@tonic-gate */ 12730Sstevel@tonic-gate if (match_flags & MATCH_IRE_ZONEONLY) 12740Sstevel@tonic-gate return (B_FALSE); 12750Sstevel@tonic-gate if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 12760Sstevel@tonic-gate return (B_FALSE); 12770Sstevel@tonic-gate /* 12780Sstevel@tonic-gate * Note, IRE_INTERFACE can have the stq as NULL. For 12790Sstevel@tonic-gate * example, if the default multicast route is tied to 12800Sstevel@tonic-gate * the loopback address. 12810Sstevel@tonic-gate */ 12820Sstevel@tonic-gate if ((ire->ire_type & IRE_INTERFACE) && 12830Sstevel@tonic-gate (ire->ire_stq != NULL)) { 12840Sstevel@tonic-gate dst_ill = (ill_t *)ire->ire_stq->q_ptr; 12850Sstevel@tonic-gate /* 12860Sstevel@tonic-gate * If there is a usable source address in the 12870Sstevel@tonic-gate * zone, then it's ok to return an 12880Sstevel@tonic-gate * IRE_INTERFACE 12890Sstevel@tonic-gate */ 12900Sstevel@tonic-gate if ((dst_ill->ill_usesrc_ifindex != 0) && 12910Sstevel@tonic-gate (src_ipif = ipif_select_source_v6(dst_ill, addr, 12922202Srk129064 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 12930Sstevel@tonic-gate != NULL) { 12940Sstevel@tonic-gate ip3dbg(("ire_match_args: src_ipif %p" 12950Sstevel@tonic-gate " dst_ill %p", (void *)src_ipif, 12960Sstevel@tonic-gate (void *)dst_ill)); 12970Sstevel@tonic-gate ipif_refrele(src_ipif); 12980Sstevel@tonic-gate } else { 12990Sstevel@tonic-gate ip3dbg(("ire_match_args: src_ipif NULL" 13000Sstevel@tonic-gate " dst_ill %p\n", (void *)dst_ill)); 13010Sstevel@tonic-gate return (B_FALSE); 13020Sstevel@tonic-gate } 13030Sstevel@tonic-gate } 13040Sstevel@tonic-gate if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 13050Sstevel@tonic-gate !(ire->ire_type & IRE_INTERFACE)) { 13060Sstevel@tonic-gate ipif_t *tipif; 13070Sstevel@tonic-gate 13080Sstevel@tonic-gate if ((match_flags & MATCH_IRE_DEFAULT) == 0) 13090Sstevel@tonic-gate return (B_FALSE); 13100Sstevel@tonic-gate mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 13110Sstevel@tonic-gate for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 13120Sstevel@tonic-gate tipif != NULL; tipif = tipif->ipif_next) { 13130Sstevel@tonic-gate if (IPIF_CAN_LOOKUP(tipif) && 13140Sstevel@tonic-gate (tipif->ipif_flags & IPIF_UP) && 13151676Sjpk (tipif->ipif_zoneid == zoneid || 13161676Sjpk tipif->ipif_zoneid == ALL_ZONES)) 13170Sstevel@tonic-gate break; 13180Sstevel@tonic-gate } 13190Sstevel@tonic-gate mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 13200Sstevel@tonic-gate if (tipif == NULL) 13210Sstevel@tonic-gate return (B_FALSE); 13220Sstevel@tonic-gate } 13230Sstevel@tonic-gate } 13240Sstevel@tonic-gate 13250Sstevel@tonic-gate if (match_flags & MATCH_IRE_GW) { 13260Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 13270Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 13280Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 13290Sstevel@tonic-gate } 13300Sstevel@tonic-gate /* 13310Sstevel@tonic-gate * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that 13320Sstevel@tonic-gate * somebody wants to send out on a particular interface which 13330Sstevel@tonic-gate * is given by ire_stq and hence use ire_stq to derive the ill 13340Sstevel@tonic-gate * value. ire_ipif for IRE_CACHES is just the 13350Sstevel@tonic-gate * means of getting a source address i.e ire_src_addr_v6 = 13360Sstevel@tonic-gate * ire->ire_ipif->ipif_src_addr_v6. 13370Sstevel@tonic-gate */ 13380Sstevel@tonic-gate if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) { 13390Sstevel@tonic-gate ire_ill = ire_to_ill(ire); 13400Sstevel@tonic-gate if (ire_ill != NULL) 13410Sstevel@tonic-gate ire_ill_group = ire_ill->ill_group; 13420Sstevel@tonic-gate ipif_ill = ipif->ipif_ill; 13430Sstevel@tonic-gate ipif_ill_group = ipif_ill->ill_group; 13440Sstevel@tonic-gate } 13450Sstevel@tonic-gate 13460Sstevel@tonic-gate /* No ire_addr_v6 bits set past the mask */ 13470Sstevel@tonic-gate ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 13480Sstevel@tonic-gate ire->ire_addr_v6)); 13490Sstevel@tonic-gate V6_MASK_COPY(*addr, *mask, masked_addr); 13500Sstevel@tonic-gate 13510Sstevel@tonic-gate if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 13520Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_GW)) || 13534714Ssowmini IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 13540Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_TYPE)) || 13554714Ssowmini (ire->ire_type & type)) && 13560Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_SRC)) || 13574714Ssowmini IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 13584714Ssowmini &ipif->ipif_v6src_addr)) && 13590Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_IPIF)) || 13604714Ssowmini (ire->ire_ipif == ipif)) && 13610Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || 13624714Ssowmini (ire->ire_type != IRE_CACHE || 13634714Ssowmini ire->ire_marks & IRE_MARK_HIDDEN)) && 13640Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_ILL)) || 13654714Ssowmini (ire_ill == ipif_ill)) && 13660Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_IHANDLE)) || 13674714Ssowmini (ire->ire_ihandle == ihandle)) && 13680Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_ILL_GROUP)) || 13694714Ssowmini (ire_ill == ipif_ill) || 13704714Ssowmini (ire_ill_group != NULL && 13714714Ssowmini ire_ill_group == ipif_ill_group)) && 13721676Sjpk ((!(match_flags & MATCH_IRE_SECATTR)) || 13734714Ssowmini (!is_system_labeled()) || 13744714Ssowmini (tsol_ire_match_gwattr(ire, tsl) == 0))) { 13750Sstevel@tonic-gate /* We found the matched IRE */ 13760Sstevel@tonic-gate return (B_TRUE); 13770Sstevel@tonic-gate } 13780Sstevel@tonic-gate return (B_FALSE); 13790Sstevel@tonic-gate } 13800Sstevel@tonic-gate 13810Sstevel@tonic-gate /* 13820Sstevel@tonic-gate * Lookup for a route in all the tables 13830Sstevel@tonic-gate */ 13840Sstevel@tonic-gate ire_t * 13850Sstevel@tonic-gate ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 13861676Sjpk const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 13873448Sdh155122 zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) 13880Sstevel@tonic-gate { 13890Sstevel@tonic-gate ire_t *ire = NULL; 13900Sstevel@tonic-gate 13910Sstevel@tonic-gate /* 13920Sstevel@tonic-gate * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 13930Sstevel@tonic-gate * MATCH_IRE_ILL is set. 13940Sstevel@tonic-gate */ 13950Sstevel@tonic-gate if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 13960Sstevel@tonic-gate (ipif == NULL)) 13970Sstevel@tonic-gate return (NULL); 13980Sstevel@tonic-gate 13990Sstevel@tonic-gate /* 14000Sstevel@tonic-gate * might be asking for a cache lookup, 14010Sstevel@tonic-gate * This is not best way to lookup cache, 14020Sstevel@tonic-gate * user should call ire_cache_lookup directly. 14030Sstevel@tonic-gate * 14040Sstevel@tonic-gate * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 14050Sstevel@tonic-gate * in the forwarding table, if the applicable type flags were set. 14060Sstevel@tonic-gate */ 14070Sstevel@tonic-gate if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 14080Sstevel@tonic-gate ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 14093448Sdh155122 tsl, flags, ipst); 14100Sstevel@tonic-gate if (ire != NULL) 14110Sstevel@tonic-gate return (ire); 14120Sstevel@tonic-gate } 14130Sstevel@tonic-gate if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 14140Sstevel@tonic-gate ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 14153448Sdh155122 pire, zoneid, 0, tsl, flags, ipst); 14160Sstevel@tonic-gate } 14170Sstevel@tonic-gate return (ire); 14180Sstevel@tonic-gate } 14190Sstevel@tonic-gate 14200Sstevel@tonic-gate /* 14210Sstevel@tonic-gate * Lookup a route in forwarding table. 14220Sstevel@tonic-gate * specific lookup is indicated by passing the 14230Sstevel@tonic-gate * required parameters and indicating the 14240Sstevel@tonic-gate * match required in flag field. 14250Sstevel@tonic-gate * 14260Sstevel@tonic-gate * Looking for default route can be done in three ways 14270Sstevel@tonic-gate * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 14280Sstevel@tonic-gate * along with other matches. 14290Sstevel@tonic-gate * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 14300Sstevel@tonic-gate * field along with other matches. 14310Sstevel@tonic-gate * 3) if the destination and mask are passed as zeros. 14320Sstevel@tonic-gate * 14330Sstevel@tonic-gate * A request to return a default route if no route 14340Sstevel@tonic-gate * is found, can be specified by setting MATCH_IRE_DEFAULT 14350Sstevel@tonic-gate * in flags. 14360Sstevel@tonic-gate * 14370Sstevel@tonic-gate * It does not support recursion more than one level. It 14380Sstevel@tonic-gate * will do recursive lookup only when the lookup maps to 14390Sstevel@tonic-gate * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 14400Sstevel@tonic-gate * 14410Sstevel@tonic-gate * If the routing table is setup to allow more than one level 14420Sstevel@tonic-gate * of recursion, the cleaning up cache table will not work resulting 14430Sstevel@tonic-gate * in invalid routing. 14440Sstevel@tonic-gate * 14450Sstevel@tonic-gate * Supports link-local addresses by following the ipif/ill when recursing. 14460Sstevel@tonic-gate * 14470Sstevel@tonic-gate * NOTE : When this function returns NULL, pire has already been released. 14480Sstevel@tonic-gate * pire is valid only when this function successfully returns an 14490Sstevel@tonic-gate * ire. 14500Sstevel@tonic-gate */ 14510Sstevel@tonic-gate ire_t * 14520Sstevel@tonic-gate ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 14531676Sjpk const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 14543448Sdh155122 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags, 14553448Sdh155122 ip_stack_t *ipst) 14560Sstevel@tonic-gate { 14570Sstevel@tonic-gate irb_t *irb_ptr; 14580Sstevel@tonic-gate ire_t *rire; 14590Sstevel@tonic-gate ire_t *ire = NULL; 14600Sstevel@tonic-gate ire_t *saved_ire; 14610Sstevel@tonic-gate nce_t *nce; 14620Sstevel@tonic-gate int i; 14630Sstevel@tonic-gate in6_addr_t gw_addr_v6; 14640Sstevel@tonic-gate 14650Sstevel@tonic-gate ASSERT(addr != NULL); 14660Sstevel@tonic-gate ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 14670Sstevel@tonic-gate ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 14680Sstevel@tonic-gate ASSERT(ipif == NULL || ipif->ipif_isv6); 14690Sstevel@tonic-gate 14700Sstevel@tonic-gate /* 14710Sstevel@tonic-gate * When we return NULL from this function, we should make 14720Sstevel@tonic-gate * sure that *pire is NULL so that the callers will not 14730Sstevel@tonic-gate * wrongly REFRELE the pire. 14740Sstevel@tonic-gate */ 14750Sstevel@tonic-gate if (pire != NULL) 14760Sstevel@tonic-gate *pire = NULL; 14770Sstevel@tonic-gate /* 14780Sstevel@tonic-gate * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 14790Sstevel@tonic-gate * MATCH_IRE_ILL is set. 14800Sstevel@tonic-gate */ 14810Sstevel@tonic-gate if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 14820Sstevel@tonic-gate (ipif == NULL)) 14830Sstevel@tonic-gate return (NULL); 14840Sstevel@tonic-gate 14850Sstevel@tonic-gate /* 14860Sstevel@tonic-gate * If the mask is known, the lookup 14870Sstevel@tonic-gate * is simple, if the mask is not known 14880Sstevel@tonic-gate * we need to search. 14890Sstevel@tonic-gate */ 14900Sstevel@tonic-gate if (flags & MATCH_IRE_MASK) { 14910Sstevel@tonic-gate uint_t masklen; 14920Sstevel@tonic-gate 14930Sstevel@tonic-gate masklen = ip_mask_to_plen_v6(mask); 14943448Sdh155122 if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) 14950Sstevel@tonic-gate return (NULL); 14963448Sdh155122 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 14973448Sdh155122 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 14984714Ssowmini ipst->ips_ip6_ftable_hash_size)]); 14990Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 15000Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 15010Sstevel@tonic-gate ire = ire->ire_next) { 15020Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 15030Sstevel@tonic-gate continue; 15040Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, mask, gateway, type, 15051676Sjpk ipif, zoneid, ihandle, tsl, flags)) 15060Sstevel@tonic-gate goto found_ire; 15070Sstevel@tonic-gate } 15080Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 15090Sstevel@tonic-gate } else { 15100Sstevel@tonic-gate /* 15110Sstevel@tonic-gate * In this case we don't know the mask, we need to 15120Sstevel@tonic-gate * search the table assuming different mask sizes. 15130Sstevel@tonic-gate * we start with 128 bit mask, we don't allow default here. 15140Sstevel@tonic-gate */ 15150Sstevel@tonic-gate for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 15160Sstevel@tonic-gate in6_addr_t tmpmask; 15170Sstevel@tonic-gate 15183448Sdh155122 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 15190Sstevel@tonic-gate continue; 15200Sstevel@tonic-gate (void) ip_plen_to_mask_v6(i, &tmpmask); 15213448Sdh155122 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 15220Sstevel@tonic-gate IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 15233448Sdh155122 ipst->ips_ip6_ftable_hash_size)]; 15240Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 15250Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 15260Sstevel@tonic-gate ire = ire->ire_next) { 15270Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 15280Sstevel@tonic-gate continue; 15290Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, 15300Sstevel@tonic-gate &ire->ire_mask_v6, gateway, type, ipif, 15311676Sjpk zoneid, ihandle, tsl, flags)) 15320Sstevel@tonic-gate goto found_ire; 15330Sstevel@tonic-gate } 15340Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 15350Sstevel@tonic-gate } 15360Sstevel@tonic-gate } 15370Sstevel@tonic-gate 15380Sstevel@tonic-gate /* 15390Sstevel@tonic-gate * We come here if no route has yet been found. 15400Sstevel@tonic-gate * 15410Sstevel@tonic-gate * Handle the case where default route is 15420Sstevel@tonic-gate * requested by specifying type as one of the possible 15430Sstevel@tonic-gate * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 15440Sstevel@tonic-gate * 15450Sstevel@tonic-gate * If MATCH_IRE_MASK is specified, then the appropriate default route 15460Sstevel@tonic-gate * would have been found above if it exists so it isn't looked up here. 15470Sstevel@tonic-gate * If MATCH_IRE_DEFAULT was also specified, then a default route will be 15480Sstevel@tonic-gate * searched for later. 15490Sstevel@tonic-gate */ 15500Sstevel@tonic-gate if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 15510Sstevel@tonic-gate (type & (IRE_DEFAULT | IRE_INTERFACE))) { 15523448Sdh155122 if (ipst->ips_ip_forwarding_table_v6[0] != NULL) { 15530Sstevel@tonic-gate /* addr & mask is zero for defaults */ 15543448Sdh155122 irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][ 15550Sstevel@tonic-gate IRE_ADDR_HASH_V6(ipv6_all_zeros, 15563448Sdh155122 ipst->ips_ip6_ftable_hash_size)]; 15570Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 15580Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 15590Sstevel@tonic-gate ire = ire->ire_next) { 15600Sstevel@tonic-gate 15610Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 15620Sstevel@tonic-gate continue; 15630Sstevel@tonic-gate 15640Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, 15650Sstevel@tonic-gate &ipv6_all_zeros, gateway, type, ipif, 15661676Sjpk zoneid, ihandle, tsl, flags)) 15670Sstevel@tonic-gate goto found_ire; 15680Sstevel@tonic-gate } 15690Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 15700Sstevel@tonic-gate } 15710Sstevel@tonic-gate } 15720Sstevel@tonic-gate /* 15730Sstevel@tonic-gate * We come here only if no route is found. 15740Sstevel@tonic-gate * see if the default route can be used which is allowed 15750Sstevel@tonic-gate * only if the default matching criteria is specified. 15760Sstevel@tonic-gate * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 15770Sstevel@tonic-gate * entries. However, the ip_forwarding_table_v6[0] also contains 15780Sstevel@tonic-gate * interface routes thus the count can be zero. 15790Sstevel@tonic-gate */ 15800Sstevel@tonic-gate saved_ire = NULL; 15810Sstevel@tonic-gate if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 15820Sstevel@tonic-gate MATCH_IRE_DEFAULT) { 15830Sstevel@tonic-gate ire_t *ire_origin; 15840Sstevel@tonic-gate uint_t g_index; 15850Sstevel@tonic-gate uint_t index; 15860Sstevel@tonic-gate 15873448Sdh155122 if (ipst->ips_ip_forwarding_table_v6[0] == NULL) 15880Sstevel@tonic-gate return (NULL); 15893448Sdh155122 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0]; 15900Sstevel@tonic-gate 15910Sstevel@tonic-gate /* 15920Sstevel@tonic-gate * Keep a tab on the bucket while looking the IRE_DEFAULT 15930Sstevel@tonic-gate * entries. We need to keep track of a particular IRE 15940Sstevel@tonic-gate * (ire_origin) so this ensures that it will not be unlinked 15950Sstevel@tonic-gate * from the hash list during the recursive lookup below. 15960Sstevel@tonic-gate */ 15970Sstevel@tonic-gate IRB_REFHOLD(irb_ptr); 15980Sstevel@tonic-gate ire = irb_ptr->irb_ire; 15990Sstevel@tonic-gate if (ire == NULL) { 16000Sstevel@tonic-gate IRB_REFRELE(irb_ptr); 16010Sstevel@tonic-gate return (NULL); 16020Sstevel@tonic-gate } 16030Sstevel@tonic-gate 16040Sstevel@tonic-gate /* 16050Sstevel@tonic-gate * Get the index first, since it can be changed by other 16060Sstevel@tonic-gate * threads. Then get to the right default route skipping 16070Sstevel@tonic-gate * default interface routes if any. As we hold a reference on 16080Sstevel@tonic-gate * the IRE bucket, ipv6_ire_default_count can only increase so 16090Sstevel@tonic-gate * we can't reach the end of the hash list unexpectedly. 16100Sstevel@tonic-gate */ 16113448Sdh155122 if (ipst->ips_ipv6_ire_default_count != 0) { 16123448Sdh155122 g_index = ipst->ips_ipv6_ire_default_index++; 16133448Sdh155122 index = g_index % ipst->ips_ipv6_ire_default_count; 16140Sstevel@tonic-gate while (index != 0) { 16150Sstevel@tonic-gate if (!(ire->ire_type & IRE_INTERFACE)) 16160Sstevel@tonic-gate index--; 16170Sstevel@tonic-gate ire = ire->ire_next; 16180Sstevel@tonic-gate } 16190Sstevel@tonic-gate ASSERT(ire != NULL); 16200Sstevel@tonic-gate } else { 16210Sstevel@tonic-gate /* 16220Sstevel@tonic-gate * No default route, so we only have default interface 16230Sstevel@tonic-gate * routes: don't enter the first loop. 16240Sstevel@tonic-gate */ 16250Sstevel@tonic-gate ire = NULL; 16260Sstevel@tonic-gate } 16270Sstevel@tonic-gate 16280Sstevel@tonic-gate /* 16290Sstevel@tonic-gate * Round-robin the default routers list looking for a neighbor 16300Sstevel@tonic-gate * that matches the passed in parameters and is reachable. If 16310Sstevel@tonic-gate * none found, just return a route from the default router list 16320Sstevel@tonic-gate * if it exists. If we can't find a default route (IRE_DEFAULT), 16330Sstevel@tonic-gate * look for interface default routes. 16340Sstevel@tonic-gate * We start with the ire we found above and we walk the hash 16350Sstevel@tonic-gate * list until we're back where we started, see 16360Sstevel@tonic-gate * ire_get_next_default_ire(). It doesn't matter if default 16370Sstevel@tonic-gate * routes are added or deleted by other threads - we know this 16380Sstevel@tonic-gate * ire will stay in the list because we hold a reference on the 16390Sstevel@tonic-gate * ire bucket. 16400Sstevel@tonic-gate * NB: if we only have interface default routes, ire is NULL so 16410Sstevel@tonic-gate * we don't even enter this loop (see above). 16420Sstevel@tonic-gate */ 16430Sstevel@tonic-gate ire_origin = ire; 16440Sstevel@tonic-gate for (; ire != NULL; 16450Sstevel@tonic-gate ire = ire_get_next_default_ire(ire, ire_origin)) { 16460Sstevel@tonic-gate 16470Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, 16480Sstevel@tonic-gate &ipv6_all_zeros, gateway, type, ipif, 16491676Sjpk zoneid, ihandle, tsl, flags)) { 16500Sstevel@tonic-gate int match_flags; 16510Sstevel@tonic-gate 16520Sstevel@tonic-gate /* 16530Sstevel@tonic-gate * We have something to work with. 16540Sstevel@tonic-gate * If we can find a resolved/reachable 16550Sstevel@tonic-gate * entry, we will use this. Otherwise 16560Sstevel@tonic-gate * we'll try to find an entry that has 16570Sstevel@tonic-gate * a resolved cache entry. We will fallback 16580Sstevel@tonic-gate * on this if we don't find anything else. 16590Sstevel@tonic-gate */ 16600Sstevel@tonic-gate if (saved_ire == NULL) 16610Sstevel@tonic-gate saved_ire = ire; 16620Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 16630Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 16640Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 16651676Sjpk match_flags = MATCH_IRE_ILL_GROUP | 16661676Sjpk MATCH_IRE_SECATTR; 16670Sstevel@tonic-gate rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 16683448Sdh155122 0, ire->ire_ipif, zoneid, tsl, match_flags, 16693448Sdh155122 ipst); 16700Sstevel@tonic-gate if (rire != NULL) { 16710Sstevel@tonic-gate nce = rire->ire_nce; 16720Sstevel@tonic-gate if (nce != NULL && 16730Sstevel@tonic-gate NCE_ISREACHABLE(nce) && 16740Sstevel@tonic-gate nce->nce_flags & NCE_F_ISROUTER) { 16750Sstevel@tonic-gate ire_refrele(rire); 16760Sstevel@tonic-gate IRE_REFHOLD(ire); 16770Sstevel@tonic-gate IRB_REFRELE(irb_ptr); 16780Sstevel@tonic-gate goto found_ire_held; 16790Sstevel@tonic-gate } else if (nce != NULL && 16800Sstevel@tonic-gate !(nce->nce_flags & 16810Sstevel@tonic-gate NCE_F_ISROUTER)) { 16820Sstevel@tonic-gate /* 16830Sstevel@tonic-gate * Make sure we don't use 16840Sstevel@tonic-gate * this ire 16850Sstevel@tonic-gate */ 16860Sstevel@tonic-gate if (saved_ire == ire) 16870Sstevel@tonic-gate saved_ire = NULL; 16880Sstevel@tonic-gate } 16890Sstevel@tonic-gate ire_refrele(rire); 16903448Sdh155122 } else if (ipst-> 16913448Sdh155122 ips_ipv6_ire_default_count > 1 && 16924333Snordmark zoneid != GLOBAL_ZONEID) { 16930Sstevel@tonic-gate /* 16940Sstevel@tonic-gate * When we're in a local zone, we're 16950Sstevel@tonic-gate * only interested in default routers 16960Sstevel@tonic-gate * that are reachable through ipifs 16970Sstevel@tonic-gate * within our zone. 16980Sstevel@tonic-gate * The potentially expensive call to 16990Sstevel@tonic-gate * ire_route_lookup_v6() is avoided when 17000Sstevel@tonic-gate * we have only one default route. 17010Sstevel@tonic-gate */ 17024333Snordmark int ire_match_flags = MATCH_IRE_TYPE | 17034333Snordmark MATCH_IRE_SECATTR; 17044333Snordmark 17054333Snordmark if (ire->ire_ipif != NULL) { 17064333Snordmark ire_match_flags |= 17074333Snordmark MATCH_IRE_ILL_GROUP; 17084333Snordmark } 17090Sstevel@tonic-gate rire = ire_route_lookup_v6(&gw_addr_v6, 17102733Snordmark NULL, NULL, IRE_INTERFACE, 17112733Snordmark ire->ire_ipif, NULL, 17124333Snordmark zoneid, tsl, ire_match_flags, ipst); 17130Sstevel@tonic-gate if (rire != NULL) { 17140Sstevel@tonic-gate ire_refrele(rire); 17150Sstevel@tonic-gate saved_ire = ire; 17160Sstevel@tonic-gate } else if (saved_ire == ire) { 17170Sstevel@tonic-gate /* 17180Sstevel@tonic-gate * Make sure we don't use 17190Sstevel@tonic-gate * this ire 17200Sstevel@tonic-gate */ 17210Sstevel@tonic-gate saved_ire = NULL; 17220Sstevel@tonic-gate } 17230Sstevel@tonic-gate } 17240Sstevel@tonic-gate } 17250Sstevel@tonic-gate } 17260Sstevel@tonic-gate if (saved_ire != NULL) { 17270Sstevel@tonic-gate ire = saved_ire; 17280Sstevel@tonic-gate IRE_REFHOLD(ire); 17290Sstevel@tonic-gate IRB_REFRELE(irb_ptr); 17300Sstevel@tonic-gate goto found_ire_held; 17310Sstevel@tonic-gate } else { 17320Sstevel@tonic-gate /* 17330Sstevel@tonic-gate * Look for a interface default route matching the 17340Sstevel@tonic-gate * args passed in. No round robin here. Just pick 17350Sstevel@tonic-gate * the right one. 17360Sstevel@tonic-gate */ 17370Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 17380Sstevel@tonic-gate ire = ire->ire_next) { 17390Sstevel@tonic-gate 17400Sstevel@tonic-gate if (!(ire->ire_type & IRE_INTERFACE)) 17410Sstevel@tonic-gate continue; 17420Sstevel@tonic-gate 17430Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 17440Sstevel@tonic-gate continue; 17450Sstevel@tonic-gate 17460Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, 17470Sstevel@tonic-gate &ipv6_all_zeros, gateway, type, ipif, 17481676Sjpk zoneid, ihandle, tsl, flags)) { 17490Sstevel@tonic-gate IRE_REFHOLD(ire); 17500Sstevel@tonic-gate IRB_REFRELE(irb_ptr); 17510Sstevel@tonic-gate goto found_ire_held; 17520Sstevel@tonic-gate } 17530Sstevel@tonic-gate } 17540Sstevel@tonic-gate IRB_REFRELE(irb_ptr); 17550Sstevel@tonic-gate } 17560Sstevel@tonic-gate } 17570Sstevel@tonic-gate ASSERT(ire == NULL); 17580Sstevel@tonic-gate ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 17590Sstevel@tonic-gate return (NULL); 17600Sstevel@tonic-gate found_ire: 17610Sstevel@tonic-gate ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 17620Sstevel@tonic-gate IRE_REFHOLD(ire); 17630Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 17640Sstevel@tonic-gate 17650Sstevel@tonic-gate found_ire_held: 17660Sstevel@tonic-gate if ((flags & MATCH_IRE_RJ_BHOLE) && 17670Sstevel@tonic-gate (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 17680Sstevel@tonic-gate return (ire); 17690Sstevel@tonic-gate } 17700Sstevel@tonic-gate /* 17710Sstevel@tonic-gate * At this point, IRE that was found must be an IRE_FORWARDTABLE 17720Sstevel@tonic-gate * or IRE_CACHETABLE type. If this is a recursive lookup and an 17730Sstevel@tonic-gate * IRE_INTERFACE type was found, return that. If it was some other 17740Sstevel@tonic-gate * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 17750Sstevel@tonic-gate * is necessary to fill in the parent IRE pointed to by pire, and 17760Sstevel@tonic-gate * then lookup the gateway address of the parent. For backwards 17770Sstevel@tonic-gate * compatiblity, if this lookup returns an 17780Sstevel@tonic-gate * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 17790Sstevel@tonic-gate * of lookup is done. 17800Sstevel@tonic-gate */ 17810Sstevel@tonic-gate if (flags & MATCH_IRE_RECURSIVE) { 17821676Sjpk const ipif_t *gw_ipif; 17830Sstevel@tonic-gate int match_flags = MATCH_IRE_DSTONLY; 17840Sstevel@tonic-gate 17850Sstevel@tonic-gate if (ire->ire_type & IRE_INTERFACE) 17860Sstevel@tonic-gate return (ire); 17870Sstevel@tonic-gate if (pire != NULL) 17880Sstevel@tonic-gate *pire = ire; 17890Sstevel@tonic-gate /* 17900Sstevel@tonic-gate * If we can't find an IRE_INTERFACE or the caller has not 17910Sstevel@tonic-gate * asked for pire, we need to REFRELE the saved_ire. 17920Sstevel@tonic-gate */ 17930Sstevel@tonic-gate saved_ire = ire; 17940Sstevel@tonic-gate 17950Sstevel@tonic-gate /* 17960Sstevel@tonic-gate * Currently MATCH_IRE_ILL is never used with 17970Sstevel@tonic-gate * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while 17980Sstevel@tonic-gate * sending out packets as MATCH_IRE_ILL is used only 17990Sstevel@tonic-gate * for communicating with on-link hosts. We can't assert 18000Sstevel@tonic-gate * that here as RTM_GET calls this function with 18010Sstevel@tonic-gate * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE. 18020Sstevel@tonic-gate * We have already used the MATCH_IRE_ILL in determining 18030Sstevel@tonic-gate * the right prefix route at this point. To match the 18040Sstevel@tonic-gate * behavior of how we locate routes while sending out 18050Sstevel@tonic-gate * packets, we don't want to use MATCH_IRE_ILL below 18060Sstevel@tonic-gate * while locating the interface route. 18070Sstevel@tonic-gate */ 18080Sstevel@tonic-gate if (ire->ire_ipif != NULL) 18090Sstevel@tonic-gate match_flags |= MATCH_IRE_ILL_GROUP; 18100Sstevel@tonic-gate 18110Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 18120Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 18130Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 18140Sstevel@tonic-gate 18150Sstevel@tonic-gate ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 18163448Sdh155122 ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); 18170Sstevel@tonic-gate if (ire == NULL) { 18180Sstevel@tonic-gate /* 18190Sstevel@tonic-gate * In this case we have to deal with the 18200Sstevel@tonic-gate * MATCH_IRE_PARENT flag, which means the 18210Sstevel@tonic-gate * parent has to be returned if ire is NULL. 18220Sstevel@tonic-gate * The aim of this is to have (at least) a starting 18230Sstevel@tonic-gate * ire when we want to look at all of the ires in a 18240Sstevel@tonic-gate * bucket aimed at a single destination (as is the 18250Sstevel@tonic-gate * case in ip_newroute_v6 for the RTF_MULTIRT 18260Sstevel@tonic-gate * flagged routes). 18270Sstevel@tonic-gate */ 18280Sstevel@tonic-gate if (flags & MATCH_IRE_PARENT) { 18290Sstevel@tonic-gate if (pire != NULL) { 18300Sstevel@tonic-gate /* 18310Sstevel@tonic-gate * Need an extra REFHOLD, if the 18320Sstevel@tonic-gate * parent ire is returned via both 18330Sstevel@tonic-gate * ire and pire. 18340Sstevel@tonic-gate */ 18350Sstevel@tonic-gate IRE_REFHOLD(saved_ire); 18360Sstevel@tonic-gate } 18370Sstevel@tonic-gate ire = saved_ire; 18380Sstevel@tonic-gate } else { 18390Sstevel@tonic-gate ire_refrele(saved_ire); 18400Sstevel@tonic-gate if (pire != NULL) 18410Sstevel@tonic-gate *pire = NULL; 18420Sstevel@tonic-gate } 18430Sstevel@tonic-gate return (ire); 18440Sstevel@tonic-gate } 18450Sstevel@tonic-gate if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 18460Sstevel@tonic-gate /* 18470Sstevel@tonic-gate * If the caller did not ask for pire, release 18480Sstevel@tonic-gate * it now. 18490Sstevel@tonic-gate */ 18500Sstevel@tonic-gate if (pire == NULL) { 18510Sstevel@tonic-gate ire_refrele(saved_ire); 18520Sstevel@tonic-gate } 18530Sstevel@tonic-gate return (ire); 18540Sstevel@tonic-gate } 18550Sstevel@tonic-gate match_flags |= MATCH_IRE_TYPE; 18560Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 18570Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 18580Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 18590Sstevel@tonic-gate gw_ipif = ire->ire_ipif; 18600Sstevel@tonic-gate ire_refrele(ire); 18610Sstevel@tonic-gate ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 18620Sstevel@tonic-gate (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 18633448Sdh155122 NULL, match_flags, ipst); 18640Sstevel@tonic-gate if (ire == NULL) { 18650Sstevel@tonic-gate /* 18660Sstevel@tonic-gate * In this case we have to deal with the 18670Sstevel@tonic-gate * MATCH_IRE_PARENT flag, which means the 18680Sstevel@tonic-gate * parent has to be returned if ire is NULL. 18690Sstevel@tonic-gate * The aim of this is to have (at least) a starting 18700Sstevel@tonic-gate * ire when we want to look at all of the ires in a 18710Sstevel@tonic-gate * bucket aimed at a single destination (as is the 18720Sstevel@tonic-gate * case in ip_newroute_v6 for the RTF_MULTIRT 18730Sstevel@tonic-gate * flagged routes). 18740Sstevel@tonic-gate */ 18750Sstevel@tonic-gate if (flags & MATCH_IRE_PARENT) { 18760Sstevel@tonic-gate if (pire != NULL) { 18770Sstevel@tonic-gate /* 18780Sstevel@tonic-gate * Need an extra REFHOLD, if the 18790Sstevel@tonic-gate * parent ire is returned via both 18800Sstevel@tonic-gate * ire and pire. 18810Sstevel@tonic-gate */ 18820Sstevel@tonic-gate IRE_REFHOLD(saved_ire); 18830Sstevel@tonic-gate } 18840Sstevel@tonic-gate ire = saved_ire; 18850Sstevel@tonic-gate } else { 18860Sstevel@tonic-gate ire_refrele(saved_ire); 18870Sstevel@tonic-gate if (pire != NULL) 18880Sstevel@tonic-gate *pire = NULL; 18890Sstevel@tonic-gate } 18900Sstevel@tonic-gate return (ire); 18910Sstevel@tonic-gate } else if (pire == NULL) { 18920Sstevel@tonic-gate /* 18930Sstevel@tonic-gate * If the caller did not ask for pire, release 18940Sstevel@tonic-gate * it now. 18950Sstevel@tonic-gate */ 18960Sstevel@tonic-gate ire_refrele(saved_ire); 18970Sstevel@tonic-gate } 18980Sstevel@tonic-gate return (ire); 18990Sstevel@tonic-gate } 19000Sstevel@tonic-gate 19010Sstevel@tonic-gate ASSERT(pire == NULL || *pire == NULL); 19020Sstevel@tonic-gate return (ire); 19030Sstevel@tonic-gate } 19040Sstevel@tonic-gate 19050Sstevel@tonic-gate /* 19061676Sjpk * Delete the IRE cache for the gateway and all IRE caches whose 19071676Sjpk * ire_gateway_addr_v6 points to this gateway, and allow them to 19081676Sjpk * be created on demand by ip_newroute_v6. 19091676Sjpk */ 19101676Sjpk void 19113448Sdh155122 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid, 19123448Sdh155122 ip_stack_t *ipst) 19131676Sjpk { 19141676Sjpk irb_t *irb; 19151676Sjpk ire_t *ire; 19161676Sjpk 19173448Sdh155122 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 19183448Sdh155122 ipst->ips_ip6_cache_table_size)]; 19191676Sjpk IRB_REFHOLD(irb); 19201676Sjpk for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 19211676Sjpk if (ire->ire_marks & IRE_MARK_CONDEMNED) 19221676Sjpk continue; 19231676Sjpk 19241676Sjpk ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 19251676Sjpk if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 19261676Sjpk IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 19271676Sjpk ire_delete(ire); 19281676Sjpk } 19291676Sjpk } 19301676Sjpk IRB_REFRELE(irb); 19311676Sjpk 19323448Sdh155122 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst); 19331676Sjpk } 19341676Sjpk 19351676Sjpk /* 19360Sstevel@tonic-gate * Looks up cache table for a route. 19370Sstevel@tonic-gate * specific lookup can be indicated by 19380Sstevel@tonic-gate * passing the MATCH_* flags and the 19390Sstevel@tonic-gate * necessary parameters. 19400Sstevel@tonic-gate */ 19410Sstevel@tonic-gate ire_t * 19420Sstevel@tonic-gate ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 19431676Sjpk int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 19443448Sdh155122 int flags, ip_stack_t *ipst) 19450Sstevel@tonic-gate { 19460Sstevel@tonic-gate ire_t *ire; 19470Sstevel@tonic-gate irb_t *irb_ptr; 19480Sstevel@tonic-gate ASSERT(addr != NULL); 19490Sstevel@tonic-gate ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 19500Sstevel@tonic-gate 19510Sstevel@tonic-gate /* 19520Sstevel@tonic-gate * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 19530Sstevel@tonic-gate * MATCH_IRE_ILL is set. 19540Sstevel@tonic-gate */ 19550Sstevel@tonic-gate if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 19560Sstevel@tonic-gate (ipif == NULL)) 19570Sstevel@tonic-gate return (NULL); 19580Sstevel@tonic-gate 19593448Sdh155122 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 19604714Ssowmini ipst->ips_ip6_cache_table_size)]; 19610Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 19620Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 19630Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 19640Sstevel@tonic-gate continue; 19650Sstevel@tonic-gate 19660Sstevel@tonic-gate ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 19670Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway, 19681676Sjpk type, ipif, zoneid, 0, tsl, flags)) { 19690Sstevel@tonic-gate IRE_REFHOLD(ire); 19700Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 19710Sstevel@tonic-gate return (ire); 19720Sstevel@tonic-gate } 19730Sstevel@tonic-gate } 19740Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 19750Sstevel@tonic-gate return (NULL); 19760Sstevel@tonic-gate } 19770Sstevel@tonic-gate 19780Sstevel@tonic-gate /* 19790Sstevel@tonic-gate * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers 19800Sstevel@tonic-gate * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get 19810Sstevel@tonic-gate * to the hidden ones. 19822733Snordmark * 19832733Snordmark * In general the zoneid has to match (where ALL_ZONES match all of them). 19842733Snordmark * But for IRE_LOCAL we also need to handle the case where L2 should 19852733Snordmark * conceptually loop back the packet. This is necessary since neither 19862733Snordmark * Ethernet drivers nor Ethernet hardware loops back packets sent to their 19872733Snordmark * own MAC address. This loopback is needed when the normal 19882733Snordmark * routes (ignoring IREs with different zoneids) would send out the packet on 19892733Snordmark * the same ill (or ill group) as the ill with which this IRE_LOCAL is 19902733Snordmark * associated. 19912733Snordmark * 19922733Snordmark * Earlier versions of this code always matched an IRE_LOCAL independently of 19932733Snordmark * the zoneid. We preserve that earlier behavior when 19942733Snordmark * ip_restrict_interzone_loopback is turned off. 19950Sstevel@tonic-gate */ 19960Sstevel@tonic-gate ire_t * 19971676Sjpk ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 19983448Sdh155122 const ts_label_t *tsl, ip_stack_t *ipst) 19990Sstevel@tonic-gate { 20000Sstevel@tonic-gate irb_t *irb_ptr; 20010Sstevel@tonic-gate ire_t *ire; 20020Sstevel@tonic-gate 20033448Sdh155122 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 20043448Sdh155122 ipst->ips_ip6_cache_table_size)]; 20050Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 20060Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 20070Sstevel@tonic-gate if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 20080Sstevel@tonic-gate continue; 20090Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 20101676Sjpk /* 20111676Sjpk * Finally, check if the security policy has any 20121676Sjpk * restriction on using this route for the specified 20131676Sjpk * message. 20141676Sjpk */ 20151676Sjpk if (tsl != NULL && 20161676Sjpk ire->ire_gw_secattr != NULL && 20171676Sjpk tsol_ire_match_gwattr(ire, tsl) != 0) { 20181676Sjpk continue; 20191676Sjpk } 20201676Sjpk 20210Sstevel@tonic-gate if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 20222733Snordmark ire->ire_zoneid == ALL_ZONES) { 20232733Snordmark IRE_REFHOLD(ire); 20242733Snordmark rw_exit(&irb_ptr->irb_lock); 20252733Snordmark return (ire); 20262733Snordmark } 20272733Snordmark 20282733Snordmark if (ire->ire_type == IRE_LOCAL) { 20293448Sdh155122 if (ipst->ips_ip_restrict_interzone_loopback && 20302733Snordmark !ire_local_ok_across_zones(ire, zoneid, 20313448Sdh155122 (void *)addr, tsl, ipst)) 20322733Snordmark continue; 20332733Snordmark 20340Sstevel@tonic-gate IRE_REFHOLD(ire); 20350Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 20360Sstevel@tonic-gate return (ire); 20370Sstevel@tonic-gate } 20380Sstevel@tonic-gate } 20390Sstevel@tonic-gate } 20400Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 20410Sstevel@tonic-gate return (NULL); 20420Sstevel@tonic-gate } 20430Sstevel@tonic-gate 20440Sstevel@tonic-gate /* 20450Sstevel@tonic-gate * Locate the interface ire that is tied to the cache ire 'cire' via 20460Sstevel@tonic-gate * cire->ire_ihandle. 20470Sstevel@tonic-gate * 20480Sstevel@tonic-gate * We are trying to create the cache ire for an onlink destn. or 20490Sstevel@tonic-gate * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 20500Sstevel@tonic-gate * case for xresolv interfaces, after the ire has come back from 20510Sstevel@tonic-gate * an external resolver. 20520Sstevel@tonic-gate */ 20530Sstevel@tonic-gate static ire_t * 20540Sstevel@tonic-gate ire_ihandle_lookup_onlink_v6(ire_t *cire) 20550Sstevel@tonic-gate { 20560Sstevel@tonic-gate ire_t *ire; 20570Sstevel@tonic-gate int match_flags; 20580Sstevel@tonic-gate int i; 20590Sstevel@tonic-gate int j; 20600Sstevel@tonic-gate irb_t *irb_ptr; 20613448Sdh155122 ip_stack_t *ipst = cire->ire_ipst; 20620Sstevel@tonic-gate 20630Sstevel@tonic-gate ASSERT(cire != NULL); 20640Sstevel@tonic-gate 20650Sstevel@tonic-gate match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 20660Sstevel@tonic-gate /* 20670Sstevel@tonic-gate * We know that the mask of the interface ire equals cire->ire_cmask. 20680Sstevel@tonic-gate * (When ip_newroute_v6() created 'cire' for an on-link destn. 20690Sstevel@tonic-gate * it set its cmask from the interface ire's mask) 20700Sstevel@tonic-gate */ 20710Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 20720Sstevel@tonic-gate NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 20733448Sdh155122 NULL, match_flags, ipst); 20740Sstevel@tonic-gate if (ire != NULL) 20750Sstevel@tonic-gate return (ire); 20760Sstevel@tonic-gate /* 20770Sstevel@tonic-gate * If we didn't find an interface ire above, we can't declare failure. 20780Sstevel@tonic-gate * For backwards compatibility, we need to support prefix routes 20790Sstevel@tonic-gate * pointing to next hop gateways that are not on-link. 20800Sstevel@tonic-gate * 20810Sstevel@tonic-gate * In the resolver/noresolver case, ip_newroute_v6() thinks 20820Sstevel@tonic-gate * it is creating the cache ire for an onlink destination in 'cire'. 20830Sstevel@tonic-gate * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 20840Sstevel@tonic-gate * cheated it, by doing ire_route_lookup_v6() twice and returning an 20850Sstevel@tonic-gate * interface ire. 20860Sstevel@tonic-gate * 20870Sstevel@tonic-gate * Eg. default - gw1 (line 1) 20880Sstevel@tonic-gate * gw1 - gw2 (line 2) 20890Sstevel@tonic-gate * gw2 - hme0 (line 3) 20900Sstevel@tonic-gate * 20910Sstevel@tonic-gate * In the above example, ip_newroute_v6() tried to create the cache ire 20920Sstevel@tonic-gate * 'cire' for gw1, based on the interface route in line 3. The 20930Sstevel@tonic-gate * ire_ftable_lookup_v6() above fails, because there is 20940Sstevel@tonic-gate * no interface route to reach gw1. (it is gw2). We fall thru below. 20950Sstevel@tonic-gate * 20960Sstevel@tonic-gate * Do a brute force search based on the ihandle in a subset of the 20970Sstevel@tonic-gate * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 20980Sstevel@tonic-gate * things become very complex, since we don't have 'pire' in this 20990Sstevel@tonic-gate * case. (Also note that this method is not possible in the offlink 21000Sstevel@tonic-gate * case because we don't know the mask) 21010Sstevel@tonic-gate */ 21020Sstevel@tonic-gate i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 21033448Sdh155122 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 21040Sstevel@tonic-gate return (NULL); 21053448Sdh155122 for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { 21063448Sdh155122 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j]; 21070Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 21080Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 21090Sstevel@tonic-gate ire = ire->ire_next) { 21100Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 21110Sstevel@tonic-gate continue; 21120Sstevel@tonic-gate if ((ire->ire_type & IRE_INTERFACE) && 21130Sstevel@tonic-gate (ire->ire_ihandle == cire->ire_ihandle)) { 21140Sstevel@tonic-gate IRE_REFHOLD(ire); 21150Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 21160Sstevel@tonic-gate return (ire); 21170Sstevel@tonic-gate } 21180Sstevel@tonic-gate } 21190Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 21200Sstevel@tonic-gate } 21210Sstevel@tonic-gate return (NULL); 21220Sstevel@tonic-gate } 21230Sstevel@tonic-gate 21240Sstevel@tonic-gate 21250Sstevel@tonic-gate /* 21260Sstevel@tonic-gate * Locate the interface ire that is tied to the cache ire 'cire' via 21270Sstevel@tonic-gate * cire->ire_ihandle. 21280Sstevel@tonic-gate * 21290Sstevel@tonic-gate * We are trying to create the cache ire for an offlink destn based 21300Sstevel@tonic-gate * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 21310Sstevel@tonic-gate * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 21320Sstevel@tonic-gate * the IRE_CACHE case. 21330Sstevel@tonic-gate */ 21340Sstevel@tonic-gate ire_t * 21350Sstevel@tonic-gate ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 21360Sstevel@tonic-gate { 21370Sstevel@tonic-gate ire_t *ire; 21380Sstevel@tonic-gate int match_flags; 21390Sstevel@tonic-gate in6_addr_t gw_addr; 21400Sstevel@tonic-gate ipif_t *gw_ipif; 21413448Sdh155122 ip_stack_t *ipst = cire->ire_ipst; 21420Sstevel@tonic-gate 21430Sstevel@tonic-gate ASSERT(cire != NULL && pire != NULL); 21440Sstevel@tonic-gate 21450Sstevel@tonic-gate match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 21460Sstevel@tonic-gate /* 21470Sstevel@tonic-gate * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only 21480Sstevel@tonic-gate * for on-link hosts. We should never be here for onlink. 21490Sstevel@tonic-gate * Thus, use MATCH_IRE_ILL_GROUP. 21500Sstevel@tonic-gate */ 21510Sstevel@tonic-gate if (pire->ire_ipif != NULL) 21520Sstevel@tonic-gate match_flags |= MATCH_IRE_ILL_GROUP; 21530Sstevel@tonic-gate /* 21540Sstevel@tonic-gate * We know that the mask of the interface ire equals cire->ire_cmask. 21550Sstevel@tonic-gate * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 21560Sstevel@tonic-gate * its cmask from the interface ire's mask) 21570Sstevel@tonic-gate */ 21580Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 21590Sstevel@tonic-gate IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 21603448Sdh155122 NULL, match_flags, ipst); 21610Sstevel@tonic-gate if (ire != NULL) 21620Sstevel@tonic-gate return (ire); 21630Sstevel@tonic-gate /* 21640Sstevel@tonic-gate * If we didn't find an interface ire above, we can't declare failure. 21650Sstevel@tonic-gate * For backwards compatibility, we need to support prefix routes 21660Sstevel@tonic-gate * pointing to next hop gateways that are not on-link. 21670Sstevel@tonic-gate * 21680Sstevel@tonic-gate * Assume we are trying to ping some offlink destn, and we have the 21690Sstevel@tonic-gate * routing table below. 21700Sstevel@tonic-gate * 21710Sstevel@tonic-gate * Eg. default - gw1 <--- pire (line 1) 21720Sstevel@tonic-gate * gw1 - gw2 (line 2) 21730Sstevel@tonic-gate * gw2 - hme0 (line 3) 21740Sstevel@tonic-gate * 21750Sstevel@tonic-gate * If we already have a cache ire for gw1 in 'cire', the 21760Sstevel@tonic-gate * ire_ftable_lookup_v6 above would have failed, since there is no 21770Sstevel@tonic-gate * interface ire to reach gw1. We will fallthru below. 21780Sstevel@tonic-gate * 21790Sstevel@tonic-gate * Here we duplicate the steps that ire_ftable_lookup_v6() did in 21800Sstevel@tonic-gate * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 21810Sstevel@tonic-gate * The differences are the following 21820Sstevel@tonic-gate * i. We want the interface ire only, so we call 21830Sstevel@tonic-gate * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 21840Sstevel@tonic-gate * ii. We look for only prefix routes in the 1st call below. 21850Sstevel@tonic-gate * ii. We want to match on the ihandle in the 2nd call below. 21860Sstevel@tonic-gate */ 21870Sstevel@tonic-gate match_flags = MATCH_IRE_TYPE; 21880Sstevel@tonic-gate if (pire->ire_ipif != NULL) 21890Sstevel@tonic-gate match_flags |= MATCH_IRE_ILL_GROUP; 21900Sstevel@tonic-gate 21910Sstevel@tonic-gate mutex_enter(&pire->ire_lock); 21920Sstevel@tonic-gate gw_addr = pire->ire_gateway_addr_v6; 21930Sstevel@tonic-gate mutex_exit(&pire->ire_lock); 21940Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 21953448Sdh155122 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 21960Sstevel@tonic-gate if (ire == NULL) 21970Sstevel@tonic-gate return (NULL); 21980Sstevel@tonic-gate /* 21990Sstevel@tonic-gate * At this point 'ire' corresponds to the entry shown in line 2. 22000Sstevel@tonic-gate * gw_addr is 'gw2' in the example above. 22010Sstevel@tonic-gate */ 22020Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 22030Sstevel@tonic-gate gw_addr = ire->ire_gateway_addr_v6; 22040Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 22050Sstevel@tonic-gate gw_ipif = ire->ire_ipif; 22060Sstevel@tonic-gate ire_refrele(ire); 22070Sstevel@tonic-gate 22080Sstevel@tonic-gate match_flags |= MATCH_IRE_IHANDLE; 22090Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 22101676Sjpk gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 22113448Sdh155122 NULL, match_flags, ipst); 22120Sstevel@tonic-gate return (ire); 22130Sstevel@tonic-gate } 22140Sstevel@tonic-gate 22150Sstevel@tonic-gate /* 22160Sstevel@tonic-gate * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 22170Sstevel@tonic-gate * ire associated with the specified ipif. 22180Sstevel@tonic-gate * 22190Sstevel@tonic-gate * This might occasionally be called when IPIF_UP is not set since 22200Sstevel@tonic-gate * the IPV6_MULTICAST_IF as well as creating interface routes 22210Sstevel@tonic-gate * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 22220Sstevel@tonic-gate * 22230Sstevel@tonic-gate * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 22240Sstevel@tonic-gate * the ipif this routine might return NULL. 22250Sstevel@tonic-gate * (Sometimes called as writer though not required by this function.) 22260Sstevel@tonic-gate */ 22270Sstevel@tonic-gate ire_t * 22281676Sjpk ipif_to_ire_v6(const ipif_t *ipif) 22290Sstevel@tonic-gate { 22300Sstevel@tonic-gate ire_t *ire; 22313448Sdh155122 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 22320Sstevel@tonic-gate 22330Sstevel@tonic-gate ASSERT(ipif->ipif_isv6); 22340Sstevel@tonic-gate if (ipif->ipif_ire_type == IRE_LOOPBACK) { 22350Sstevel@tonic-gate ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 22361676Sjpk IRE_LOOPBACK, ipif, ALL_ZONES, NULL, 22373448Sdh155122 (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst); 22380Sstevel@tonic-gate } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 22390Sstevel@tonic-gate /* In this case we need to lookup destination address. */ 22400Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 22410Sstevel@tonic-gate &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 22421676Sjpk 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 22433448Sdh155122 MATCH_IRE_MASK), ipst); 22440Sstevel@tonic-gate } else { 22450Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 22460Sstevel@tonic-gate &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 22471676Sjpk ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 22483448Sdh155122 MATCH_IRE_MASK), ipst); 22490Sstevel@tonic-gate } 22500Sstevel@tonic-gate return (ire); 22510Sstevel@tonic-gate } 22520Sstevel@tonic-gate 22530Sstevel@tonic-gate /* 22540Sstevel@tonic-gate * Return B_TRUE if a multirt route is resolvable 22550Sstevel@tonic-gate * (or if no route is resolved yet), B_FALSE otherwise. 22560Sstevel@tonic-gate * This only works in the global zone. 22570Sstevel@tonic-gate */ 22580Sstevel@tonic-gate boolean_t 22593448Sdh155122 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl, 22603448Sdh155122 ip_stack_t *ipst) 22610Sstevel@tonic-gate { 22620Sstevel@tonic-gate ire_t *first_fire; 22630Sstevel@tonic-gate ire_t *first_cire; 22640Sstevel@tonic-gate ire_t *fire; 22650Sstevel@tonic-gate ire_t *cire; 22660Sstevel@tonic-gate irb_t *firb; 22670Sstevel@tonic-gate irb_t *cirb; 22680Sstevel@tonic-gate int unres_cnt = 0; 22690Sstevel@tonic-gate boolean_t resolvable = B_FALSE; 22700Sstevel@tonic-gate 22710Sstevel@tonic-gate /* Retrieve the first IRE_HOST that matches the destination */ 22720Sstevel@tonic-gate first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 22731676Sjpk NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 22743448Sdh155122 MATCH_IRE_SECATTR, ipst); 22750Sstevel@tonic-gate 22760Sstevel@tonic-gate /* No route at all */ 22770Sstevel@tonic-gate if (first_fire == NULL) { 22780Sstevel@tonic-gate return (B_TRUE); 22790Sstevel@tonic-gate } 22800Sstevel@tonic-gate 22810Sstevel@tonic-gate firb = first_fire->ire_bucket; 22820Sstevel@tonic-gate ASSERT(firb); 22830Sstevel@tonic-gate 22840Sstevel@tonic-gate /* Retrieve the first IRE_CACHE ire for that destination. */ 22853448Sdh155122 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst); 22860Sstevel@tonic-gate 22870Sstevel@tonic-gate /* No resolved route. */ 22880Sstevel@tonic-gate if (first_cire == NULL) { 22890Sstevel@tonic-gate ire_refrele(first_fire); 22900Sstevel@tonic-gate return (B_TRUE); 22910Sstevel@tonic-gate } 22920Sstevel@tonic-gate 22930Sstevel@tonic-gate /* At least one route is resolved. */ 22940Sstevel@tonic-gate 22950Sstevel@tonic-gate cirb = first_cire->ire_bucket; 22960Sstevel@tonic-gate ASSERT(cirb); 22970Sstevel@tonic-gate 22980Sstevel@tonic-gate /* Count the number of routes to that dest that are declared. */ 22990Sstevel@tonic-gate IRB_REFHOLD(firb); 23000Sstevel@tonic-gate for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 23010Sstevel@tonic-gate if (!(fire->ire_flags & RTF_MULTIRT)) 23020Sstevel@tonic-gate continue; 23030Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 23040Sstevel@tonic-gate continue; 23050Sstevel@tonic-gate unres_cnt++; 23060Sstevel@tonic-gate } 23070Sstevel@tonic-gate IRB_REFRELE(firb); 23080Sstevel@tonic-gate 23090Sstevel@tonic-gate 23100Sstevel@tonic-gate /* Then subtract the number of routes to that dst that are resolved */ 23110Sstevel@tonic-gate IRB_REFHOLD(cirb); 23120Sstevel@tonic-gate for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 23134714Ssowmini if (!(cire->ire_flags & RTF_MULTIRT)) 23144714Ssowmini continue; 23154714Ssowmini if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 23164714Ssowmini continue; 23174714Ssowmini if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 23184714Ssowmini continue; 23194714Ssowmini unres_cnt--; 23200Sstevel@tonic-gate } 23210Sstevel@tonic-gate IRB_REFRELE(cirb); 23220Sstevel@tonic-gate 23230Sstevel@tonic-gate /* At least one route is unresolved; search for a resolvable route. */ 23240Sstevel@tonic-gate if (unres_cnt > 0) 23250Sstevel@tonic-gate resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 23263448Sdh155122 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst); 23270Sstevel@tonic-gate 23280Sstevel@tonic-gate if (first_fire) 23290Sstevel@tonic-gate ire_refrele(first_fire); 23300Sstevel@tonic-gate 23310Sstevel@tonic-gate if (first_cire) 23320Sstevel@tonic-gate ire_refrele(first_cire); 23330Sstevel@tonic-gate 23340Sstevel@tonic-gate return (resolvable); 23350Sstevel@tonic-gate } 23360Sstevel@tonic-gate 23370Sstevel@tonic-gate 23380Sstevel@tonic-gate /* 23390Sstevel@tonic-gate * Return B_TRUE and update *ire_arg and *fire_arg 23400Sstevel@tonic-gate * if at least one resolvable route is found. 23410Sstevel@tonic-gate * Return B_FALSE otherwise (all routes are resolved or 23420Sstevel@tonic-gate * the remaining unresolved routes are all unresolvable). 23430Sstevel@tonic-gate * This only works in the global zone. 23440Sstevel@tonic-gate */ 23450Sstevel@tonic-gate boolean_t 23461676Sjpk ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 23473448Sdh155122 const ts_label_t *tsl, ip_stack_t *ipst) 23480Sstevel@tonic-gate { 23490Sstevel@tonic-gate clock_t delta; 23500Sstevel@tonic-gate ire_t *best_fire = NULL; 23510Sstevel@tonic-gate ire_t *best_cire = NULL; 23520Sstevel@tonic-gate ire_t *first_fire; 23530Sstevel@tonic-gate ire_t *first_cire; 23540Sstevel@tonic-gate ire_t *fire; 23550Sstevel@tonic-gate ire_t *cire; 23560Sstevel@tonic-gate irb_t *firb = NULL; 23570Sstevel@tonic-gate irb_t *cirb = NULL; 23580Sstevel@tonic-gate ire_t *gw_ire; 23590Sstevel@tonic-gate boolean_t already_resolved; 23600Sstevel@tonic-gate boolean_t res; 23610Sstevel@tonic-gate in6_addr_t v6dst; 23620Sstevel@tonic-gate in6_addr_t v6gw; 23630Sstevel@tonic-gate 23640Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 23650Sstevel@tonic-gate "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 23660Sstevel@tonic-gate 23670Sstevel@tonic-gate ASSERT(ire_arg); 23680Sstevel@tonic-gate ASSERT(fire_arg); 23690Sstevel@tonic-gate 23700Sstevel@tonic-gate /* Not an IRE_HOST ire; give up. */ 23710Sstevel@tonic-gate if ((*fire_arg == NULL) || 23720Sstevel@tonic-gate ((*fire_arg)->ire_type != IRE_HOST)) { 23730Sstevel@tonic-gate return (B_FALSE); 23740Sstevel@tonic-gate } 23750Sstevel@tonic-gate 23760Sstevel@tonic-gate /* This is the first IRE_HOST ire for that destination. */ 23770Sstevel@tonic-gate first_fire = *fire_arg; 23780Sstevel@tonic-gate firb = first_fire->ire_bucket; 23790Sstevel@tonic-gate ASSERT(firb); 23800Sstevel@tonic-gate 23810Sstevel@tonic-gate mutex_enter(&first_fire->ire_lock); 23820Sstevel@tonic-gate v6dst = first_fire->ire_addr_v6; 23830Sstevel@tonic-gate mutex_exit(&first_fire->ire_lock); 23840Sstevel@tonic-gate 23850Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 23860Sstevel@tonic-gate ntohl(V4_PART_OF_V6(v6dst)))); 23870Sstevel@tonic-gate 23880Sstevel@tonic-gate /* 23890Sstevel@tonic-gate * Retrieve the first IRE_CACHE ire for that destination; 23900Sstevel@tonic-gate * if we don't find one, no route for that dest is 23910Sstevel@tonic-gate * resolved yet. 23920Sstevel@tonic-gate */ 23933448Sdh155122 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst); 23940Sstevel@tonic-gate if (first_cire) { 23950Sstevel@tonic-gate cirb = first_cire->ire_bucket; 23960Sstevel@tonic-gate } 23970Sstevel@tonic-gate 23980Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 23990Sstevel@tonic-gate 24000Sstevel@tonic-gate /* 24010Sstevel@tonic-gate * Search for a resolvable route, giving the top priority 24020Sstevel@tonic-gate * to routes that can be resolved without any call to the resolver. 24030Sstevel@tonic-gate */ 24040Sstevel@tonic-gate IRB_REFHOLD(firb); 24050Sstevel@tonic-gate 24060Sstevel@tonic-gate if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 24070Sstevel@tonic-gate /* 24080Sstevel@tonic-gate * For all multiroute IRE_HOST ires for that destination, 24090Sstevel@tonic-gate * check if the route via the IRE_HOST's gateway is 24100Sstevel@tonic-gate * resolved yet. 24110Sstevel@tonic-gate */ 24120Sstevel@tonic-gate for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 24130Sstevel@tonic-gate 24140Sstevel@tonic-gate if (!(fire->ire_flags & RTF_MULTIRT)) 24150Sstevel@tonic-gate continue; 24160Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 24170Sstevel@tonic-gate continue; 24180Sstevel@tonic-gate 24191676Sjpk if (fire->ire_gw_secattr != NULL && 24201676Sjpk tsol_ire_match_gwattr(fire, tsl) != 0) { 24211676Sjpk continue; 24221676Sjpk } 24231676Sjpk 24240Sstevel@tonic-gate mutex_enter(&fire->ire_lock); 24250Sstevel@tonic-gate v6gw = fire->ire_gateway_addr_v6; 24260Sstevel@tonic-gate mutex_exit(&fire->ire_lock); 24270Sstevel@tonic-gate 24280Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: fire %p, " 24290Sstevel@tonic-gate "ire_addr %08x, ire_gateway_addr %08x\n", 24300Sstevel@tonic-gate (void *)fire, 24310Sstevel@tonic-gate ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 24320Sstevel@tonic-gate ntohl(V4_PART_OF_V6(v6gw)))); 24330Sstevel@tonic-gate 24340Sstevel@tonic-gate already_resolved = B_FALSE; 24350Sstevel@tonic-gate 24360Sstevel@tonic-gate if (first_cire) { 24370Sstevel@tonic-gate ASSERT(cirb); 24380Sstevel@tonic-gate 24390Sstevel@tonic-gate IRB_REFHOLD(cirb); 24400Sstevel@tonic-gate /* 24410Sstevel@tonic-gate * For all IRE_CACHE ires for that 24420Sstevel@tonic-gate * destination. 24430Sstevel@tonic-gate */ 24440Sstevel@tonic-gate for (cire = first_cire; 24450Sstevel@tonic-gate cire != NULL; 24460Sstevel@tonic-gate cire = cire->ire_next) { 24470Sstevel@tonic-gate 24480Sstevel@tonic-gate if (!(cire->ire_flags & RTF_MULTIRT)) 24490Sstevel@tonic-gate continue; 24500Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL( 24510Sstevel@tonic-gate &cire->ire_addr_v6, &v6dst)) 24520Sstevel@tonic-gate continue; 24530Sstevel@tonic-gate if (cire->ire_marks & 24540Sstevel@tonic-gate (IRE_MARK_CONDEMNED| 24554714Ssowmini IRE_MARK_HIDDEN)) 24560Sstevel@tonic-gate continue; 24571676Sjpk 24581676Sjpk if (cire->ire_gw_secattr != NULL && 24591676Sjpk tsol_ire_match_gwattr(cire, 24601676Sjpk tsl) != 0) { 24611676Sjpk continue; 24621676Sjpk } 24631676Sjpk 24640Sstevel@tonic-gate /* 24650Sstevel@tonic-gate * Check if the IRE_CACHE's gateway 24660Sstevel@tonic-gate * matches the IRE_HOST's gateway. 24670Sstevel@tonic-gate */ 24680Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL( 24690Sstevel@tonic-gate &cire->ire_gateway_addr_v6, 24700Sstevel@tonic-gate &v6gw)) { 24710Sstevel@tonic-gate already_resolved = B_TRUE; 24720Sstevel@tonic-gate break; 24730Sstevel@tonic-gate } 24740Sstevel@tonic-gate } 24750Sstevel@tonic-gate IRB_REFRELE(cirb); 24760Sstevel@tonic-gate } 24770Sstevel@tonic-gate 24780Sstevel@tonic-gate /* 24790Sstevel@tonic-gate * This route is already resolved; 24800Sstevel@tonic-gate * proceed with next one. 24810Sstevel@tonic-gate */ 24820Sstevel@tonic-gate if (already_resolved) { 24830Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 24840Sstevel@tonic-gate "already resolved\n", (void *)cire)); 24850Sstevel@tonic-gate continue; 24860Sstevel@tonic-gate } 24870Sstevel@tonic-gate 24880Sstevel@tonic-gate /* 24890Sstevel@tonic-gate * The route is unresolved; is it actually 24900Sstevel@tonic-gate * resolvable, i.e. is there a cache or a resolver 24910Sstevel@tonic-gate * for the gateway? 24920Sstevel@tonic-gate */ 24930Sstevel@tonic-gate gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 24941676Sjpk ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 24953448Sdh155122 MATCH_IRE_SECATTR, ipst); 24960Sstevel@tonic-gate 24970Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 24980Sstevel@tonic-gate (void *)gw_ire)); 24990Sstevel@tonic-gate 25000Sstevel@tonic-gate /* 25010Sstevel@tonic-gate * This route can be resolved without any call to the 25020Sstevel@tonic-gate * resolver; if the MULTIRT_CACHEGW flag is set, 25030Sstevel@tonic-gate * give the top priority to this ire and exit the 25040Sstevel@tonic-gate * loop. 25050Sstevel@tonic-gate * This occurs when an resolver reply is processed 25060Sstevel@tonic-gate * through ip_wput_nondata() 25070Sstevel@tonic-gate */ 25080Sstevel@tonic-gate if ((flags & MULTIRT_CACHEGW) && 25090Sstevel@tonic-gate (gw_ire != NULL) && 25100Sstevel@tonic-gate (gw_ire->ire_type & IRE_CACHETABLE)) { 25110Sstevel@tonic-gate /* 25120Sstevel@tonic-gate * Release the resolver associated to the 25130Sstevel@tonic-gate * previous candidate best ire, if any. 25140Sstevel@tonic-gate */ 25150Sstevel@tonic-gate if (best_cire) { 25160Sstevel@tonic-gate ire_refrele(best_cire); 25170Sstevel@tonic-gate ASSERT(best_fire); 25180Sstevel@tonic-gate } 25190Sstevel@tonic-gate 25200Sstevel@tonic-gate best_fire = fire; 25210Sstevel@tonic-gate best_cire = gw_ire; 25220Sstevel@tonic-gate 25230Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: found top prio " 25240Sstevel@tonic-gate "best_fire %p, best_cire %p\n", 25250Sstevel@tonic-gate (void *)best_fire, (void *)best_cire)); 25260Sstevel@tonic-gate break; 25270Sstevel@tonic-gate } 25280Sstevel@tonic-gate 25290Sstevel@tonic-gate /* 25300Sstevel@tonic-gate * Compute the time elapsed since our preceding 25310Sstevel@tonic-gate * attempt to resolve that route. 25320Sstevel@tonic-gate * If the MULTIRT_USESTAMP flag is set, we take that 25330Sstevel@tonic-gate * route into account only if this time interval 25340Sstevel@tonic-gate * exceeds ip_multirt_resolution_interval; 25350Sstevel@tonic-gate * this prevents us from attempting to resolve a 25360Sstevel@tonic-gate * broken route upon each sending of a packet. 25370Sstevel@tonic-gate */ 25380Sstevel@tonic-gate delta = lbolt - fire->ire_last_used_time; 25390Sstevel@tonic-gate delta = TICK_TO_MSEC(delta); 25400Sstevel@tonic-gate 25410Sstevel@tonic-gate res = (boolean_t) 25423448Sdh155122 ((delta > ipst-> 25434714Ssowmini ips_ip_multirt_resolution_interval) || 25443448Sdh155122 (!(flags & MULTIRT_USESTAMP))); 25450Sstevel@tonic-gate 25460Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 25470Sstevel@tonic-gate "res %d\n", 25480Sstevel@tonic-gate (void *)fire, delta, res)); 25490Sstevel@tonic-gate 25500Sstevel@tonic-gate if (res) { 25510Sstevel@tonic-gate /* 25520Sstevel@tonic-gate * A resolver exists for the gateway: save 25530Sstevel@tonic-gate * the current IRE_HOST ire as a candidate 25540Sstevel@tonic-gate * best ire. If we later discover that a 25550Sstevel@tonic-gate * top priority ire exists (i.e. no need to 25560Sstevel@tonic-gate * call the resolver), then this new ire 25570Sstevel@tonic-gate * will be preferred to the current one. 25580Sstevel@tonic-gate */ 25590Sstevel@tonic-gate if (gw_ire != NULL) { 25600Sstevel@tonic-gate if (best_fire == NULL) { 25610Sstevel@tonic-gate ASSERT(best_cire == NULL); 25620Sstevel@tonic-gate 25630Sstevel@tonic-gate best_fire = fire; 25640Sstevel@tonic-gate best_cire = gw_ire; 25650Sstevel@tonic-gate 25660Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6:" 25670Sstevel@tonic-gate "found candidate " 25680Sstevel@tonic-gate "best_fire %p, " 25690Sstevel@tonic-gate "best_cire %p\n", 25700Sstevel@tonic-gate (void *)best_fire, 25710Sstevel@tonic-gate (void *)best_cire)); 25720Sstevel@tonic-gate 25730Sstevel@tonic-gate /* 25740Sstevel@tonic-gate * If MULTIRT_CACHEGW is not 25750Sstevel@tonic-gate * set, we ignore the top 25760Sstevel@tonic-gate * priority ires that can 25770Sstevel@tonic-gate * be resolved without any 25780Sstevel@tonic-gate * call to the resolver; 25790Sstevel@tonic-gate * In that case, there is 25800Sstevel@tonic-gate * actually no need 25810Sstevel@tonic-gate * to continue the loop. 25820Sstevel@tonic-gate */ 25830Sstevel@tonic-gate if (!(flags & 25840Sstevel@tonic-gate MULTIRT_CACHEGW)) { 25850Sstevel@tonic-gate break; 25860Sstevel@tonic-gate } 25870Sstevel@tonic-gate continue; 25880Sstevel@tonic-gate } 25890Sstevel@tonic-gate } else { 25900Sstevel@tonic-gate /* 25910Sstevel@tonic-gate * No resolver for the gateway: the 25920Sstevel@tonic-gate * route is not resolvable. 25930Sstevel@tonic-gate * If the MULTIRT_SETSTAMP flag is 25940Sstevel@tonic-gate * set, we stamp the IRE_HOST ire, 25950Sstevel@tonic-gate * so we will not select it again 25960Sstevel@tonic-gate * during this resolution interval. 25970Sstevel@tonic-gate */ 25980Sstevel@tonic-gate if (flags & MULTIRT_SETSTAMP) 25990Sstevel@tonic-gate fire->ire_last_used_time = 26000Sstevel@tonic-gate lbolt; 26010Sstevel@tonic-gate } 26020Sstevel@tonic-gate } 26030Sstevel@tonic-gate 26040Sstevel@tonic-gate if (gw_ire != NULL) 26050Sstevel@tonic-gate ire_refrele(gw_ire); 26060Sstevel@tonic-gate } 26070Sstevel@tonic-gate } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 26080Sstevel@tonic-gate 26090Sstevel@tonic-gate for (fire = first_fire; 26100Sstevel@tonic-gate fire != NULL; 26110Sstevel@tonic-gate fire = fire->ire_next) { 26120Sstevel@tonic-gate 26130Sstevel@tonic-gate if (!(fire->ire_flags & RTF_MULTIRT)) 26140Sstevel@tonic-gate continue; 26150Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 26160Sstevel@tonic-gate continue; 26170Sstevel@tonic-gate 26181676Sjpk if (fire->ire_gw_secattr != NULL && 26191676Sjpk tsol_ire_match_gwattr(fire, tsl) != 0) { 26201676Sjpk continue; 26211676Sjpk } 26221676Sjpk 26230Sstevel@tonic-gate already_resolved = B_FALSE; 26240Sstevel@tonic-gate 26250Sstevel@tonic-gate mutex_enter(&fire->ire_lock); 26260Sstevel@tonic-gate v6gw = fire->ire_gateway_addr_v6; 26270Sstevel@tonic-gate mutex_exit(&fire->ire_lock); 26280Sstevel@tonic-gate 26290Sstevel@tonic-gate gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 26301676Sjpk IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 26311676Sjpk MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 26323448Sdh155122 MATCH_IRE_SECATTR, ipst); 26330Sstevel@tonic-gate 26340Sstevel@tonic-gate /* No resolver for the gateway; we skip this ire. */ 26350Sstevel@tonic-gate if (gw_ire == NULL) { 26360Sstevel@tonic-gate continue; 26370Sstevel@tonic-gate } 26380Sstevel@tonic-gate 26390Sstevel@tonic-gate if (first_cire) { 26400Sstevel@tonic-gate 26410Sstevel@tonic-gate IRB_REFHOLD(cirb); 26420Sstevel@tonic-gate /* 26430Sstevel@tonic-gate * For all IRE_CACHE ires for that 26440Sstevel@tonic-gate * destination. 26450Sstevel@tonic-gate */ 26460Sstevel@tonic-gate for (cire = first_cire; 26470Sstevel@tonic-gate cire != NULL; 26480Sstevel@tonic-gate cire = cire->ire_next) { 26490Sstevel@tonic-gate 26500Sstevel@tonic-gate if (!(cire->ire_flags & RTF_MULTIRT)) 26510Sstevel@tonic-gate continue; 26520Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL( 26530Sstevel@tonic-gate &cire->ire_addr_v6, &v6dst)) 26540Sstevel@tonic-gate continue; 26550Sstevel@tonic-gate if (cire->ire_marks & 26560Sstevel@tonic-gate (IRE_MARK_CONDEMNED| 26574714Ssowmini IRE_MARK_HIDDEN)) 26580Sstevel@tonic-gate continue; 26591676Sjpk 26601676Sjpk if (cire->ire_gw_secattr != NULL && 26611676Sjpk tsol_ire_match_gwattr(cire, 26621676Sjpk tsl) != 0) { 26631676Sjpk continue; 26641676Sjpk } 26651676Sjpk 26660Sstevel@tonic-gate /* 26670Sstevel@tonic-gate * Cache entries are linked to the 26680Sstevel@tonic-gate * parent routes using the parent handle 26690Sstevel@tonic-gate * (ire_phandle). If no cache entry has 26700Sstevel@tonic-gate * the same handle as fire, fire is 26710Sstevel@tonic-gate * still unresolved. 26720Sstevel@tonic-gate */ 26730Sstevel@tonic-gate ASSERT(cire->ire_phandle != 0); 26740Sstevel@tonic-gate if (cire->ire_phandle == 26750Sstevel@tonic-gate fire->ire_phandle) { 26760Sstevel@tonic-gate already_resolved = B_TRUE; 26770Sstevel@tonic-gate break; 26780Sstevel@tonic-gate } 26790Sstevel@tonic-gate } 26800Sstevel@tonic-gate IRB_REFRELE(cirb); 26810Sstevel@tonic-gate } 26820Sstevel@tonic-gate 26830Sstevel@tonic-gate /* 26840Sstevel@tonic-gate * This route is already resolved; proceed with 26850Sstevel@tonic-gate * next one. 26860Sstevel@tonic-gate */ 26870Sstevel@tonic-gate if (already_resolved) { 26880Sstevel@tonic-gate ire_refrele(gw_ire); 26890Sstevel@tonic-gate continue; 26900Sstevel@tonic-gate } 26910Sstevel@tonic-gate 26920Sstevel@tonic-gate /* 26930Sstevel@tonic-gate * Compute the time elapsed since our preceding 26940Sstevel@tonic-gate * attempt to resolve that route. 26950Sstevel@tonic-gate * If the MULTIRT_USESTAMP flag is set, we take 26960Sstevel@tonic-gate * that route into account only if this time 26970Sstevel@tonic-gate * interval exceeds ip_multirt_resolution_interval; 26980Sstevel@tonic-gate * this prevents us from attempting to resolve a 26990Sstevel@tonic-gate * broken route upon each sending of a packet. 27000Sstevel@tonic-gate */ 27010Sstevel@tonic-gate delta = lbolt - fire->ire_last_used_time; 27020Sstevel@tonic-gate delta = TICK_TO_MSEC(delta); 27030Sstevel@tonic-gate 27040Sstevel@tonic-gate res = (boolean_t) 27053448Sdh155122 ((delta > ipst-> 27064714Ssowmini ips_ip_multirt_resolution_interval) || 27070Sstevel@tonic-gate (!(flags & MULTIRT_USESTAMP))); 27080Sstevel@tonic-gate 27090Sstevel@tonic-gate ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 27100Sstevel@tonic-gate "flags %04x, res %d\n", 27110Sstevel@tonic-gate (void *)fire, delta, flags, res)); 27120Sstevel@tonic-gate 27130Sstevel@tonic-gate if (res) { 27140Sstevel@tonic-gate if (best_cire) { 27150Sstevel@tonic-gate /* 27160Sstevel@tonic-gate * Release the resolver associated 27170Sstevel@tonic-gate * to the preceding candidate best 27180Sstevel@tonic-gate * ire, if any. 27190Sstevel@tonic-gate */ 27200Sstevel@tonic-gate ire_refrele(best_cire); 27210Sstevel@tonic-gate ASSERT(best_fire); 27220Sstevel@tonic-gate } 27230Sstevel@tonic-gate best_fire = fire; 27240Sstevel@tonic-gate best_cire = gw_ire; 27250Sstevel@tonic-gate continue; 27260Sstevel@tonic-gate } 27270Sstevel@tonic-gate 27280Sstevel@tonic-gate ire_refrele(gw_ire); 27290Sstevel@tonic-gate } 27300Sstevel@tonic-gate } 27310Sstevel@tonic-gate 27320Sstevel@tonic-gate if (best_fire) { 27330Sstevel@tonic-gate IRE_REFHOLD(best_fire); 27340Sstevel@tonic-gate } 27350Sstevel@tonic-gate IRB_REFRELE(firb); 27360Sstevel@tonic-gate 27370Sstevel@tonic-gate /* Release the first IRE_CACHE we initially looked up, if any. */ 27380Sstevel@tonic-gate if (first_cire) 27390Sstevel@tonic-gate ire_refrele(first_cire); 27400Sstevel@tonic-gate 27410Sstevel@tonic-gate /* Found a resolvable route. */ 27420Sstevel@tonic-gate if (best_fire) { 27430Sstevel@tonic-gate ASSERT(best_cire); 27440Sstevel@tonic-gate 27450Sstevel@tonic-gate if (*fire_arg) 27460Sstevel@tonic-gate ire_refrele(*fire_arg); 27470Sstevel@tonic-gate if (*ire_arg) 27480Sstevel@tonic-gate ire_refrele(*ire_arg); 27490Sstevel@tonic-gate 27500Sstevel@tonic-gate /* 27510Sstevel@tonic-gate * Update the passed arguments with the 27520Sstevel@tonic-gate * resolvable multirt route we found 27530Sstevel@tonic-gate */ 27540Sstevel@tonic-gate *fire_arg = best_fire; 27550Sstevel@tonic-gate *ire_arg = best_cire; 27560Sstevel@tonic-gate 27570Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 27580Sstevel@tonic-gate "*fire_arg %p, *ire_arg %p\n", 27590Sstevel@tonic-gate (void *)best_fire, (void *)best_cire)); 27600Sstevel@tonic-gate 27610Sstevel@tonic-gate return (B_TRUE); 27620Sstevel@tonic-gate } 27630Sstevel@tonic-gate 27640Sstevel@tonic-gate ASSERT(best_cire == NULL); 27650Sstevel@tonic-gate 27660Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 27670Sstevel@tonic-gate "*ire_arg %p\n", 27680Sstevel@tonic-gate (void *)*fire_arg, (void *)*ire_arg)); 27690Sstevel@tonic-gate 27700Sstevel@tonic-gate /* No resolvable route. */ 27710Sstevel@tonic-gate return (B_FALSE); 27720Sstevel@tonic-gate } 27730Sstevel@tonic-gate 27740Sstevel@tonic-gate 27750Sstevel@tonic-gate /* 27760Sstevel@tonic-gate * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 27770Sstevel@tonic-gate * that goes through 'ipif'. As a fallback, a route that goes through 27780Sstevel@tonic-gate * ipif->ipif_ill can be returned. 27790Sstevel@tonic-gate */ 27800Sstevel@tonic-gate ire_t * 27810Sstevel@tonic-gate ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 27820Sstevel@tonic-gate { 27830Sstevel@tonic-gate ire_t *ire; 27840Sstevel@tonic-gate ire_t *save_ire = NULL; 27850Sstevel@tonic-gate ire_t *gw_ire; 27860Sstevel@tonic-gate irb_t *irb; 27870Sstevel@tonic-gate in6_addr_t v6gw; 27880Sstevel@tonic-gate int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 27893448Sdh155122 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 27900Sstevel@tonic-gate 27910Sstevel@tonic-gate ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 27923448Sdh155122 NULL, MATCH_IRE_DEFAULT, ipst); 27930Sstevel@tonic-gate 27940Sstevel@tonic-gate if (ire == NULL) 27950Sstevel@tonic-gate return (NULL); 27960Sstevel@tonic-gate 27970Sstevel@tonic-gate irb = ire->ire_bucket; 27980Sstevel@tonic-gate ASSERT(irb); 27990Sstevel@tonic-gate 28000Sstevel@tonic-gate IRB_REFHOLD(irb); 28010Sstevel@tonic-gate ire_refrele(ire); 28020Sstevel@tonic-gate for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 28030Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 28041676Sjpk (ipif->ipif_zoneid != ire->ire_zoneid && 28051676Sjpk ire->ire_zoneid != ALL_ZONES)) { 28060Sstevel@tonic-gate continue; 28070Sstevel@tonic-gate } 28080Sstevel@tonic-gate 28090Sstevel@tonic-gate switch (ire->ire_type) { 28100Sstevel@tonic-gate case IRE_DEFAULT: 28110Sstevel@tonic-gate case IRE_PREFIX: 28120Sstevel@tonic-gate case IRE_HOST: 28130Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 28140Sstevel@tonic-gate v6gw = ire->ire_gateway_addr_v6; 28150Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 28160Sstevel@tonic-gate gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 28170Sstevel@tonic-gate IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 28183448Sdh155122 NULL, match_flags, ipst); 28190Sstevel@tonic-gate 28200Sstevel@tonic-gate if (gw_ire != NULL) { 28210Sstevel@tonic-gate if (save_ire != NULL) { 28220Sstevel@tonic-gate ire_refrele(save_ire); 28230Sstevel@tonic-gate } 28240Sstevel@tonic-gate IRE_REFHOLD(ire); 28250Sstevel@tonic-gate if (gw_ire->ire_ipif == ipif) { 28260Sstevel@tonic-gate ire_refrele(gw_ire); 28270Sstevel@tonic-gate 28280Sstevel@tonic-gate IRB_REFRELE(irb); 28290Sstevel@tonic-gate return (ire); 28300Sstevel@tonic-gate } 28310Sstevel@tonic-gate ire_refrele(gw_ire); 28320Sstevel@tonic-gate save_ire = ire; 28330Sstevel@tonic-gate } 28340Sstevel@tonic-gate break; 28350Sstevel@tonic-gate case IRE_IF_NORESOLVER: 28360Sstevel@tonic-gate case IRE_IF_RESOLVER: 28370Sstevel@tonic-gate if (ire->ire_ipif == ipif) { 28380Sstevel@tonic-gate if (save_ire != NULL) { 28390Sstevel@tonic-gate ire_refrele(save_ire); 28400Sstevel@tonic-gate } 28410Sstevel@tonic-gate IRE_REFHOLD(ire); 28420Sstevel@tonic-gate 28430Sstevel@tonic-gate IRB_REFRELE(irb); 28440Sstevel@tonic-gate return (ire); 28450Sstevel@tonic-gate } 28460Sstevel@tonic-gate break; 28470Sstevel@tonic-gate } 28480Sstevel@tonic-gate } 28490Sstevel@tonic-gate IRB_REFRELE(irb); 28500Sstevel@tonic-gate 28510Sstevel@tonic-gate return (save_ire); 28520Sstevel@tonic-gate } 2853