10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51676Sjpk * Common Development and Distribution License (the "License"). 61676Sjpk * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*8485SPeter.Memishian@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate /* 260Sstevel@tonic-gate * Copyright (c) 1990 Mentat Inc. 270Sstevel@tonic-gate */ 280Sstevel@tonic-gate 290Sstevel@tonic-gate /* 300Sstevel@tonic-gate * This file contains routines that manipulate Internet Routing Entries (IREs). 310Sstevel@tonic-gate */ 320Sstevel@tonic-gate #include <sys/types.h> 330Sstevel@tonic-gate #include <sys/stream.h> 340Sstevel@tonic-gate #include <sys/stropts.h> 350Sstevel@tonic-gate #include <sys/ddi.h> 360Sstevel@tonic-gate #include <sys/cmn_err.h> 370Sstevel@tonic-gate 380Sstevel@tonic-gate #include <sys/systm.h> 390Sstevel@tonic-gate #include <sys/param.h> 400Sstevel@tonic-gate #include <sys/socket.h> 410Sstevel@tonic-gate #include <net/if.h> 420Sstevel@tonic-gate #include <net/route.h> 430Sstevel@tonic-gate #include <netinet/in.h> 440Sstevel@tonic-gate #include <net/if_dl.h> 450Sstevel@tonic-gate #include <netinet/ip6.h> 460Sstevel@tonic-gate #include <netinet/icmp6.h> 470Sstevel@tonic-gate 480Sstevel@tonic-gate #include <inet/common.h> 490Sstevel@tonic-gate #include <inet/mi.h> 500Sstevel@tonic-gate #include <inet/ip.h> 510Sstevel@tonic-gate #include <inet/ip6.h> 520Sstevel@tonic-gate #include <inet/ip_ndp.h> 530Sstevel@tonic-gate #include <inet/ip_if.h> 540Sstevel@tonic-gate #include <inet/ip_ire.h> 550Sstevel@tonic-gate #include <inet/ipclassifier.h> 560Sstevel@tonic-gate #include <inet/nd.h> 570Sstevel@tonic-gate #include <sys/kmem.h> 580Sstevel@tonic-gate #include <sys/zone.h> 590Sstevel@tonic-gate 601676Sjpk #include <sys/tsol/label.h> 611676Sjpk #include <sys/tsol/tnet.h> 621676Sjpk 630Sstevel@tonic-gate static ire_t ire_null; 640Sstevel@tonic-gate 650Sstevel@tonic-gate static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 660Sstevel@tonic-gate static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 671676Sjpk const in6_addr_t *mask, const in6_addr_t *gateway, int type, 681676Sjpk const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 691676Sjpk const ts_label_t *tsl, int match_flags); 704714Ssowmini static ire_t *ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *, 714714Ssowmini const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *, 724714Ssowmini ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, 734714Ssowmini const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 747880SJonathan.Anderson@Sun.COM static ire_t *ip6_ctable_lookup_impl(ire_ctable_args_t *); 750Sstevel@tonic-gate 760Sstevel@tonic-gate /* 770Sstevel@tonic-gate * Initialize the ire that is specific to IPv6 part and call 780Sstevel@tonic-gate * ire_init_common to finish it. 790Sstevel@tonic-gate */ 804714Ssowmini static ire_t * 814714Ssowmini ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask, 824714Ssowmini const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 834714Ssowmini uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type, 844714Ssowmini ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, 854714Ssowmini uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc, 864714Ssowmini tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 870Sstevel@tonic-gate { 882535Ssangeeta 891676Sjpk /* 901676Sjpk * Reject IRE security attribute creation/initialization 911676Sjpk * if system is not running in Trusted mode. 921676Sjpk */ 931676Sjpk if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 941676Sjpk return (NULL); 951676Sjpk 960Sstevel@tonic-gate 973448Sdh155122 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 980Sstevel@tonic-gate ire->ire_addr_v6 = *v6addr; 990Sstevel@tonic-gate 1000Sstevel@tonic-gate if (v6src_addr != NULL) 1010Sstevel@tonic-gate ire->ire_src_addr_v6 = *v6src_addr; 1020Sstevel@tonic-gate if (v6mask != NULL) { 1030Sstevel@tonic-gate ire->ire_mask_v6 = *v6mask; 1040Sstevel@tonic-gate ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 1050Sstevel@tonic-gate } 1060Sstevel@tonic-gate if (v6gateway != NULL) 1070Sstevel@tonic-gate ire->ire_gateway_addr_v6 = *v6gateway; 1080Sstevel@tonic-gate 1090Sstevel@tonic-gate if (type == IRE_CACHE && v6cmask != NULL) 1100Sstevel@tonic-gate ire->ire_cmask_v6 = *v6cmask; 1110Sstevel@tonic-gate 1120Sstevel@tonic-gate /* 1130Sstevel@tonic-gate * Multirouted packets need to have a fragment header added so that 1140Sstevel@tonic-gate * the receiver is able to discard duplicates according to their 1150Sstevel@tonic-gate * fragment identifier. 1160Sstevel@tonic-gate */ 1170Sstevel@tonic-gate if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 1180Sstevel@tonic-gate ire->ire_frag_flag = IPH_FRAG_HDR; 1190Sstevel@tonic-gate } 1200Sstevel@tonic-gate 1211676Sjpk /* ire_init_common will free the mblks upon encountering any failure */ 1224823Sseb if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, ipif, 1234823Sseb phandle, ihandle, flags, IPV6_VERSION, ulp_info, gc, gcgrp, ipst)) 1241676Sjpk return (NULL); 1250Sstevel@tonic-gate 1260Sstevel@tonic-gate return (ire); 1270Sstevel@tonic-gate } 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate /* 1300Sstevel@tonic-gate * Similar to ire_create_v6 except that it is called only when 1310Sstevel@tonic-gate * we want to allocate ire as an mblk e.g. we have a external 1320Sstevel@tonic-gate * resolver. Do we need this in IPv6 ? 1334714Ssowmini * 1344714Ssowmini * IPv6 initializes the ire_nce in ire_add_v6, which expects to 1354714Ssowmini * find the ire_nce to be null when it is called. So, although 1364714Ssowmini * we have a src_nce parameter (in the interest of matching up with 1374714Ssowmini * the argument list of the v4 version), we ignore the src_nce 1384714Ssowmini * argument here. 1390Sstevel@tonic-gate */ 1404714Ssowmini /* ARGSUSED */ 1410Sstevel@tonic-gate ire_t * 1420Sstevel@tonic-gate ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 1430Sstevel@tonic-gate const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 1444714Ssowmini nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type, 1454714Ssowmini ipif_t *ipif, const in6_addr_t *v6cmask, 1461676Sjpk uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 1473448Sdh155122 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 1480Sstevel@tonic-gate { 1490Sstevel@tonic-gate ire_t *ire; 1500Sstevel@tonic-gate ire_t *ret_ire; 1510Sstevel@tonic-gate mblk_t *mp; 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 1540Sstevel@tonic-gate 1550Sstevel@tonic-gate /* Allocate the new IRE. */ 1560Sstevel@tonic-gate mp = allocb(sizeof (ire_t), BPRI_MED); 1570Sstevel@tonic-gate if (mp == NULL) { 1580Sstevel@tonic-gate ip1dbg(("ire_create_mp_v6: alloc failed\n")); 1590Sstevel@tonic-gate return (NULL); 1600Sstevel@tonic-gate } 1610Sstevel@tonic-gate 1620Sstevel@tonic-gate ire = (ire_t *)mp->b_rptr; 1630Sstevel@tonic-gate mp->b_wptr = (uchar_t *)&ire[1]; 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate /* Start clean. */ 1660Sstevel@tonic-gate *ire = ire_null; 1670Sstevel@tonic-gate ire->ire_mp = mp; 1680Sstevel@tonic-gate mp->b_datap->db_type = IRE_DB_TYPE; 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 1714714Ssowmini NULL, rfq, stq, type, ipif, v6cmask, phandle, 1723448Sdh155122 ihandle, flags, ulp_info, gc, gcgrp, ipst); 1730Sstevel@tonic-gate 1740Sstevel@tonic-gate if (ret_ire == NULL) { 1750Sstevel@tonic-gate freeb(ire->ire_mp); 1760Sstevel@tonic-gate return (NULL); 1770Sstevel@tonic-gate } 1780Sstevel@tonic-gate return (ire); 1790Sstevel@tonic-gate } 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate /* 1820Sstevel@tonic-gate * ire_create_v6 is called to allocate and initialize a new IRE. 1830Sstevel@tonic-gate * 1840Sstevel@tonic-gate * NOTE : This is called as writer sometimes though not required 1850Sstevel@tonic-gate * by this function. 1864714Ssowmini * 1874714Ssowmini * See comments above ire_create_mp_v6() for the rationale behind the 1884714Ssowmini * unused src_nce argument. 1890Sstevel@tonic-gate */ 1904714Ssowmini /* ARGSUSED */ 1910Sstevel@tonic-gate ire_t * 1920Sstevel@tonic-gate ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 1930Sstevel@tonic-gate const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 1944714Ssowmini uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq, 1954714Ssowmini ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask, 1961676Sjpk uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 1973448Sdh155122 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 1980Sstevel@tonic-gate { 1990Sstevel@tonic-gate ire_t *ire; 2000Sstevel@tonic-gate ire_t *ret_ire; 2010Sstevel@tonic-gate 2020Sstevel@tonic-gate ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 2050Sstevel@tonic-gate if (ire == NULL) { 2060Sstevel@tonic-gate ip1dbg(("ire_create_v6: alloc failed\n")); 2070Sstevel@tonic-gate return (NULL); 2080Sstevel@tonic-gate } 2090Sstevel@tonic-gate *ire = ire_null; 2100Sstevel@tonic-gate 2110Sstevel@tonic-gate ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 2124714Ssowmini max_fragp, rfq, stq, type, ipif, v6cmask, phandle, 2133448Sdh155122 ihandle, flags, ulp_info, gc, gcgrp, ipst); 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate if (ret_ire == NULL) { 2160Sstevel@tonic-gate kmem_cache_free(ire_cache, ire); 2170Sstevel@tonic-gate return (NULL); 2180Sstevel@tonic-gate } 2190Sstevel@tonic-gate ASSERT(ret_ire == ire); 2200Sstevel@tonic-gate return (ire); 2210Sstevel@tonic-gate } 2220Sstevel@tonic-gate 2230Sstevel@tonic-gate /* 2240Sstevel@tonic-gate * Find an IRE_INTERFACE for the multicast group. 2250Sstevel@tonic-gate * Allows different routes for multicast addresses 2260Sstevel@tonic-gate * in the unicast routing table (akin to FF::0/8 but could be more specific) 2270Sstevel@tonic-gate * which point at different interfaces. This is used when IPV6_MULTICAST_IF 2280Sstevel@tonic-gate * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 2290Sstevel@tonic-gate * specify the interface to join on. 2300Sstevel@tonic-gate * 2310Sstevel@tonic-gate * Supports link-local addresses by following the ipif/ill when recursing. 2320Sstevel@tonic-gate */ 2330Sstevel@tonic-gate ire_t * 2343448Sdh155122 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 2350Sstevel@tonic-gate { 2360Sstevel@tonic-gate ire_t *ire; 2370Sstevel@tonic-gate ipif_t *ipif = NULL; 2380Sstevel@tonic-gate int match_flags = MATCH_IRE_TYPE; 2390Sstevel@tonic-gate in6_addr_t gw_addr_v6; 2400Sstevel@tonic-gate 2410Sstevel@tonic-gate ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 2423448Sdh155122 zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst); 2430Sstevel@tonic-gate 2440Sstevel@tonic-gate /* We search a resolvable ire in case of multirouting. */ 2450Sstevel@tonic-gate if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 2460Sstevel@tonic-gate ire_t *cire = NULL; 2470Sstevel@tonic-gate /* 2480Sstevel@tonic-gate * If the route is not resolvable, the looked up ire 2490Sstevel@tonic-gate * may be changed here. In that case, ire_multirt_lookup() 2500Sstevel@tonic-gate * IRE_REFRELE the original ire and change it. 2510Sstevel@tonic-gate */ 2521676Sjpk (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 2533448Sdh155122 NULL, ipst); 2540Sstevel@tonic-gate if (cire != NULL) 2550Sstevel@tonic-gate ire_refrele(cire); 2560Sstevel@tonic-gate } 2570Sstevel@tonic-gate if (ire == NULL) 2580Sstevel@tonic-gate return (NULL); 2590Sstevel@tonic-gate /* 2600Sstevel@tonic-gate * Make sure we follow ire_ipif. 2610Sstevel@tonic-gate * 2620Sstevel@tonic-gate * We need to determine the interface route through 263*8485SPeter.Memishian@Sun.COM * which the gateway will be reached. 2640Sstevel@tonic-gate */ 2650Sstevel@tonic-gate if (ire->ire_ipif != NULL) { 2660Sstevel@tonic-gate ipif = ire->ire_ipif; 267*8485SPeter.Memishian@Sun.COM match_flags |= MATCH_IRE_ILL; 2680Sstevel@tonic-gate } 2690Sstevel@tonic-gate 2700Sstevel@tonic-gate switch (ire->ire_type) { 2710Sstevel@tonic-gate case IRE_DEFAULT: 2720Sstevel@tonic-gate case IRE_PREFIX: 2730Sstevel@tonic-gate case IRE_HOST: 2740Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 2750Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 2760Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 2770Sstevel@tonic-gate ire_refrele(ire); 2780Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 2790Sstevel@tonic-gate IRE_INTERFACE, ipif, NULL, zoneid, 0, 2803448Sdh155122 NULL, match_flags, ipst); 2810Sstevel@tonic-gate return (ire); 2820Sstevel@tonic-gate case IRE_IF_NORESOLVER: 2830Sstevel@tonic-gate case IRE_IF_RESOLVER: 2840Sstevel@tonic-gate return (ire); 2850Sstevel@tonic-gate default: 2860Sstevel@tonic-gate ire_refrele(ire); 2870Sstevel@tonic-gate return (NULL); 2880Sstevel@tonic-gate } 2890Sstevel@tonic-gate } 2900Sstevel@tonic-gate 2910Sstevel@tonic-gate /* 2920Sstevel@tonic-gate * Return any local address. We use this to target ourselves 2930Sstevel@tonic-gate * when the src address was specified as 'default'. 2940Sstevel@tonic-gate * Preference for IRE_LOCAL entries. 2950Sstevel@tonic-gate */ 2960Sstevel@tonic-gate ire_t * 2973448Sdh155122 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst) 2980Sstevel@tonic-gate { 2990Sstevel@tonic-gate ire_t *ire; 3000Sstevel@tonic-gate irb_t *irb; 3010Sstevel@tonic-gate ire_t *maybe = NULL; 3020Sstevel@tonic-gate int i; 3030Sstevel@tonic-gate 3043448Sdh155122 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 3053448Sdh155122 irb = &ipst->ips_ip_cache_table_v6[i]; 3060Sstevel@tonic-gate if (irb->irb_ire == NULL) 3070Sstevel@tonic-gate continue; 3080Sstevel@tonic-gate rw_enter(&irb->irb_lock, RW_READER); 3090Sstevel@tonic-gate for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 3100Sstevel@tonic-gate if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 3111676Sjpk ire->ire_zoneid != zoneid && 3121676Sjpk ire->ire_zoneid != ALL_ZONES) 3130Sstevel@tonic-gate continue; 3140Sstevel@tonic-gate switch (ire->ire_type) { 3150Sstevel@tonic-gate case IRE_LOOPBACK: 3160Sstevel@tonic-gate if (maybe == NULL) { 3170Sstevel@tonic-gate IRE_REFHOLD(ire); 3180Sstevel@tonic-gate maybe = ire; 3190Sstevel@tonic-gate } 3200Sstevel@tonic-gate break; 3210Sstevel@tonic-gate case IRE_LOCAL: 3220Sstevel@tonic-gate if (maybe != NULL) { 3230Sstevel@tonic-gate ire_refrele(maybe); 3240Sstevel@tonic-gate } 3250Sstevel@tonic-gate IRE_REFHOLD(ire); 3260Sstevel@tonic-gate rw_exit(&irb->irb_lock); 3270Sstevel@tonic-gate return (ire); 3280Sstevel@tonic-gate } 3290Sstevel@tonic-gate } 3300Sstevel@tonic-gate rw_exit(&irb->irb_lock); 3310Sstevel@tonic-gate } 3320Sstevel@tonic-gate return (maybe); 3330Sstevel@tonic-gate } 3340Sstevel@tonic-gate 3350Sstevel@tonic-gate /* 3360Sstevel@tonic-gate * This function takes a mask and returns number of bits set in the 3370Sstevel@tonic-gate * mask (the represented prefix length). Assumes a contiguous mask. 3380Sstevel@tonic-gate */ 3390Sstevel@tonic-gate int 3400Sstevel@tonic-gate ip_mask_to_plen_v6(const in6_addr_t *v6mask) 3410Sstevel@tonic-gate { 3420Sstevel@tonic-gate int bits; 3430Sstevel@tonic-gate int plen = IPV6_ABITS; 3440Sstevel@tonic-gate int i; 3450Sstevel@tonic-gate 3460Sstevel@tonic-gate for (i = 3; i >= 0; i--) { 3470Sstevel@tonic-gate if (v6mask->s6_addr32[i] == 0) { 3480Sstevel@tonic-gate plen -= 32; 3490Sstevel@tonic-gate continue; 3500Sstevel@tonic-gate } 3510Sstevel@tonic-gate bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 3520Sstevel@tonic-gate if (bits == 0) 3530Sstevel@tonic-gate break; 3540Sstevel@tonic-gate plen -= bits; 3550Sstevel@tonic-gate } 3560Sstevel@tonic-gate 3570Sstevel@tonic-gate return (plen); 3580Sstevel@tonic-gate } 3590Sstevel@tonic-gate 3600Sstevel@tonic-gate /* 3610Sstevel@tonic-gate * Convert a prefix length to the mask for that prefix. 3620Sstevel@tonic-gate * Returns the argument bitmask. 3630Sstevel@tonic-gate */ 3640Sstevel@tonic-gate in6_addr_t * 3650Sstevel@tonic-gate ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 3660Sstevel@tonic-gate { 3670Sstevel@tonic-gate uint32_t *ptr; 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate if (plen < 0 || plen > IPV6_ABITS) 3700Sstevel@tonic-gate return (NULL); 3710Sstevel@tonic-gate *bitmask = ipv6_all_zeros; 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate ptr = (uint32_t *)bitmask; 3740Sstevel@tonic-gate while (plen > 32) { 3750Sstevel@tonic-gate *ptr++ = 0xffffffffU; 3760Sstevel@tonic-gate plen -= 32; 3770Sstevel@tonic-gate } 3780Sstevel@tonic-gate *ptr = htonl(0xffffffffU << (32 - plen)); 3790Sstevel@tonic-gate return (bitmask); 3800Sstevel@tonic-gate } 3810Sstevel@tonic-gate 3820Sstevel@tonic-gate /* 3830Sstevel@tonic-gate * Add a fully initialized IRE to an appropriate 3840Sstevel@tonic-gate * table based on ire_type. 3850Sstevel@tonic-gate * 3863004Sdd193516 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and 3870Sstevel@tonic-gate * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 3880Sstevel@tonic-gate * 3890Sstevel@tonic-gate * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 3900Sstevel@tonic-gate * and IRE_CACHE. 3910Sstevel@tonic-gate * 3920Sstevel@tonic-gate * NOTE : This function is called as writer though not required 3930Sstevel@tonic-gate * by this function. 3940Sstevel@tonic-gate */ 3950Sstevel@tonic-gate int 3960Sstevel@tonic-gate ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 3970Sstevel@tonic-gate { 3980Sstevel@tonic-gate ire_t *ire1; 3990Sstevel@tonic-gate int mask_table_index; 4000Sstevel@tonic-gate irb_t *irb_ptr; 4010Sstevel@tonic-gate ire_t **irep; 4020Sstevel@tonic-gate int flags; 4030Sstevel@tonic-gate ire_t *pire = NULL; 4040Sstevel@tonic-gate ill_t *stq_ill; 4050Sstevel@tonic-gate boolean_t ndp_g_lock_held = B_FALSE; 4060Sstevel@tonic-gate ire_t *ire = *ire_p; 4070Sstevel@tonic-gate int error; 4083448Sdh155122 ip_stack_t *ipst = ire->ire_ipst; 409*8485SPeter.Memishian@Sun.COM uint_t marks = 0; 4100Sstevel@tonic-gate 4110Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION); 4120Sstevel@tonic-gate ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 4130Sstevel@tonic-gate ASSERT(ire->ire_nce == NULL); 4140Sstevel@tonic-gate 415*8485SPeter.Memishian@Sun.COM /* 416*8485SPeter.Memishian@Sun.COM * IREs with source addresses hosted on interfaces that are under IPMP 417*8485SPeter.Memishian@Sun.COM * should be hidden so that applications don't accidentally end up 418*8485SPeter.Memishian@Sun.COM * sending packets with test addresses as their source addresses, or 419*8485SPeter.Memishian@Sun.COM * sending out interfaces that are e.g. IFF_INACTIVE. Hide them here. 420*8485SPeter.Memishian@Sun.COM * (We let IREs with unspecified source addresses slip through since 421*8485SPeter.Memishian@Sun.COM * ire_send_v6() will delete them automatically.) 422*8485SPeter.Memishian@Sun.COM */ 423*8485SPeter.Memishian@Sun.COM if (ire->ire_ipif != NULL && IS_UNDER_IPMP(ire->ire_ipif->ipif_ill) && 424*8485SPeter.Memishian@Sun.COM !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_src_addr_v6)) { 425*8485SPeter.Memishian@Sun.COM DTRACE_PROBE1(ipmp__mark__testhidden, ire_t *, ire); 426*8485SPeter.Memishian@Sun.COM marks |= IRE_MARK_TESTHIDDEN; 427*8485SPeter.Memishian@Sun.COM } 428*8485SPeter.Memishian@Sun.COM 4290Sstevel@tonic-gate /* Find the appropriate list head. */ 4300Sstevel@tonic-gate switch (ire->ire_type) { 4310Sstevel@tonic-gate case IRE_HOST: 4320Sstevel@tonic-gate ire->ire_mask_v6 = ipv6_all_ones; 4330Sstevel@tonic-gate ire->ire_masklen = IPV6_ABITS; 434*8485SPeter.Memishian@Sun.COM ire->ire_marks |= marks; 4350Sstevel@tonic-gate if ((ire->ire_flags & RTF_SETSRC) == 0) 4360Sstevel@tonic-gate ire->ire_src_addr_v6 = ipv6_all_zeros; 4370Sstevel@tonic-gate break; 4380Sstevel@tonic-gate case IRE_CACHE: 439*8485SPeter.Memishian@Sun.COM ire->ire_mask_v6 = ipv6_all_ones; 440*8485SPeter.Memishian@Sun.COM ire->ire_masklen = IPV6_ABITS; 441*8485SPeter.Memishian@Sun.COM ire->ire_marks |= marks; 442*8485SPeter.Memishian@Sun.COM break; 4430Sstevel@tonic-gate case IRE_LOCAL: 4440Sstevel@tonic-gate case IRE_LOOPBACK: 4450Sstevel@tonic-gate ire->ire_mask_v6 = ipv6_all_ones; 4460Sstevel@tonic-gate ire->ire_masklen = IPV6_ABITS; 4470Sstevel@tonic-gate break; 4480Sstevel@tonic-gate case IRE_PREFIX: 4490Sstevel@tonic-gate case IRE_DEFAULT: 450*8485SPeter.Memishian@Sun.COM ire->ire_marks |= marks; 4510Sstevel@tonic-gate if ((ire->ire_flags & RTF_SETSRC) == 0) 4520Sstevel@tonic-gate ire->ire_src_addr_v6 = ipv6_all_zeros; 4530Sstevel@tonic-gate break; 4540Sstevel@tonic-gate case IRE_IF_RESOLVER: 4550Sstevel@tonic-gate case IRE_IF_NORESOLVER: 456*8485SPeter.Memishian@Sun.COM ire->ire_marks |= marks; 4570Sstevel@tonic-gate break; 4580Sstevel@tonic-gate default: 4590Sstevel@tonic-gate printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 4600Sstevel@tonic-gate (void *)ire, ire->ire_type); 4610Sstevel@tonic-gate ire_delete(ire); 4620Sstevel@tonic-gate *ire_p = NULL; 4630Sstevel@tonic-gate return (EINVAL); 4640Sstevel@tonic-gate } 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate /* Make sure the address is properly masked. */ 4670Sstevel@tonic-gate V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 4680Sstevel@tonic-gate 4690Sstevel@tonic-gate if ((ire->ire_type & IRE_CACHETABLE) == 0) { 4700Sstevel@tonic-gate /* IRE goes into Forward Table */ 4710Sstevel@tonic-gate mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 4723448Sdh155122 if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == 4733448Sdh155122 NULL) { 4740Sstevel@tonic-gate irb_t *ptr; 4750Sstevel@tonic-gate int i; 4760Sstevel@tonic-gate 4773448Sdh155122 ptr = (irb_t *)mi_zalloc(( 4783448Sdh155122 ipst->ips_ip6_ftable_hash_size * sizeof (irb_t))); 4790Sstevel@tonic-gate if (ptr == NULL) { 4800Sstevel@tonic-gate ire_delete(ire); 4810Sstevel@tonic-gate *ire_p = NULL; 4820Sstevel@tonic-gate return (ENOMEM); 4830Sstevel@tonic-gate } 4843448Sdh155122 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 4850Sstevel@tonic-gate rw_init(&ptr[i].irb_lock, NULL, 4860Sstevel@tonic-gate RW_DEFAULT, NULL); 4870Sstevel@tonic-gate } 4883448Sdh155122 mutex_enter(&ipst->ips_ire_ft_init_lock); 4893448Sdh155122 if (ipst->ips_ip_forwarding_table_v6[ 4903448Sdh155122 mask_table_index] == NULL) { 4913448Sdh155122 ipst->ips_ip_forwarding_table_v6[ 4923448Sdh155122 mask_table_index] = ptr; 4933448Sdh155122 mutex_exit(&ipst->ips_ire_ft_init_lock); 4940Sstevel@tonic-gate } else { 4950Sstevel@tonic-gate /* 4960Sstevel@tonic-gate * Some other thread won the race in 4970Sstevel@tonic-gate * initializing the forwarding table at the 4980Sstevel@tonic-gate * same index. 4990Sstevel@tonic-gate */ 5003448Sdh155122 mutex_exit(&ipst->ips_ire_ft_init_lock); 5013448Sdh155122 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; 5023448Sdh155122 i++) { 5030Sstevel@tonic-gate rw_destroy(&ptr[i].irb_lock); 5040Sstevel@tonic-gate } 5050Sstevel@tonic-gate mi_free(ptr); 5060Sstevel@tonic-gate } 5070Sstevel@tonic-gate } 5083448Sdh155122 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 5090Sstevel@tonic-gate IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 5103448Sdh155122 ipst->ips_ip6_ftable_hash_size)]); 5110Sstevel@tonic-gate } else { 5123448Sdh155122 irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 5133448Sdh155122 ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]); 5140Sstevel@tonic-gate } 5150Sstevel@tonic-gate /* 5160Sstevel@tonic-gate * For xresolv interfaces (v6 interfaces with an external 5170Sstevel@tonic-gate * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 5180Sstevel@tonic-gate * are unable to prevent the deletion of the interface route 5190Sstevel@tonic-gate * while adding an IRE_CACHE for an on-link destination 5200Sstevel@tonic-gate * in the IRE_IF_RESOLVER case, since the ire has to go to 5210Sstevel@tonic-gate * the external resolver and return. We can't do a REFHOLD on the 5220Sstevel@tonic-gate * associated interface ire for fear of the message being freed 5230Sstevel@tonic-gate * if the external resolver can't resolve the address. 5240Sstevel@tonic-gate * Here we look up the interface ire in the forwarding table 5250Sstevel@tonic-gate * and make sure that the interface route has not been deleted. 5260Sstevel@tonic-gate */ 5270Sstevel@tonic-gate if (ire->ire_type == IRE_CACHE && 5280Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 5290Sstevel@tonic-gate (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 5300Sstevel@tonic-gate (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 5310Sstevel@tonic-gate 5320Sstevel@tonic-gate pire = ire_ihandle_lookup_onlink_v6(ire); 5330Sstevel@tonic-gate if (pire == NULL) { 5340Sstevel@tonic-gate ire_delete(ire); 5350Sstevel@tonic-gate *ire_p = NULL; 5360Sstevel@tonic-gate return (EINVAL); 5370Sstevel@tonic-gate } 5380Sstevel@tonic-gate /* Prevent pire from getting deleted */ 5390Sstevel@tonic-gate IRB_REFHOLD(pire->ire_bucket); 5400Sstevel@tonic-gate /* Has it been removed already? */ 5410Sstevel@tonic-gate if (pire->ire_marks & IRE_MARK_CONDEMNED) { 5420Sstevel@tonic-gate IRB_REFRELE(pire->ire_bucket); 5430Sstevel@tonic-gate ire_refrele(pire); 5440Sstevel@tonic-gate ire_delete(ire); 5450Sstevel@tonic-gate *ire_p = NULL; 5460Sstevel@tonic-gate return (EINVAL); 5470Sstevel@tonic-gate } 5480Sstevel@tonic-gate } 5490Sstevel@tonic-gate 5500Sstevel@tonic-gate flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 5510Sstevel@tonic-gate /* 5520Sstevel@tonic-gate * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 5530Sstevel@tonic-gate * for duplicates because : 5540Sstevel@tonic-gate * 5550Sstevel@tonic-gate * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 5560Sstevel@tonic-gate * pointing at different ills. A real duplicate is 5570Sstevel@tonic-gate * a match on both ire_ipif and ire_stq. 5580Sstevel@tonic-gate * 5590Sstevel@tonic-gate * 2) We could have multiple packets trying to create 5600Sstevel@tonic-gate * an IRE_CACHE for the same ill. 5610Sstevel@tonic-gate * 562*8485SPeter.Memishian@Sun.COM * Rather than looking at the packet, we depend on the above for 563*8485SPeter.Memishian@Sun.COM * MATCH_IRE_ILL here. 5640Sstevel@tonic-gate * 5650Sstevel@tonic-gate * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 5660Sstevel@tonic-gate * multiple IRE_CACHES for an ill for the same destination 5670Sstevel@tonic-gate * with various scoped addresses i.e represented by ipifs. 5680Sstevel@tonic-gate * 5690Sstevel@tonic-gate * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 5700Sstevel@tonic-gate */ 5710Sstevel@tonic-gate if (ire->ire_ipif != NULL) 5720Sstevel@tonic-gate flags |= MATCH_IRE_IPIF; 573*8485SPeter.Memishian@Sun.COM 5740Sstevel@tonic-gate /* 575*8485SPeter.Memishian@Sun.COM * If we are creating a hidden IRE, make sure we search for 576*8485SPeter.Memishian@Sun.COM * hidden IREs when searching for duplicates below. 577*8485SPeter.Memishian@Sun.COM * Otherwise, we might find an IRE on some other interface 578*8485SPeter.Memishian@Sun.COM * that's not marked hidden. 5790Sstevel@tonic-gate */ 580*8485SPeter.Memishian@Sun.COM if (ire->ire_marks & IRE_MARK_TESTHIDDEN) 581*8485SPeter.Memishian@Sun.COM flags |= MATCH_IRE_MARK_TESTHIDDEN; 5820Sstevel@tonic-gate 5830Sstevel@tonic-gate /* 5840Sstevel@tonic-gate * Start the atomic add of the ire. Grab the ill locks, 5850Sstevel@tonic-gate * ill_g_usesrc_lock and the bucket lock. Check for condemned. 5862535Ssangeeta * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. 5870Sstevel@tonic-gate */ 5880Sstevel@tonic-gate if (ire->ire_type == IRE_CACHE) { 5893448Sdh155122 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 5900Sstevel@tonic-gate ndp_g_lock_held = B_TRUE; 5910Sstevel@tonic-gate } 5920Sstevel@tonic-gate 5930Sstevel@tonic-gate /* 5940Sstevel@tonic-gate * If ipif or ill is changing ire_atomic_start() may queue the 5950Sstevel@tonic-gate * request and return EINPROGRESS. 5960Sstevel@tonic-gate */ 5970Sstevel@tonic-gate 5980Sstevel@tonic-gate error = ire_atomic_start(irb_ptr, ire, q, mp, func); 5990Sstevel@tonic-gate if (error != 0) { 6000Sstevel@tonic-gate if (ndp_g_lock_held) 6013448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 6020Sstevel@tonic-gate /* 6030Sstevel@tonic-gate * We don't know whether it is a valid ipif or not. 6040Sstevel@tonic-gate * So, set it to NULL. This assumes that the ire has not added 6050Sstevel@tonic-gate * a reference to the ipif. 6060Sstevel@tonic-gate */ 6070Sstevel@tonic-gate ire->ire_ipif = NULL; 6080Sstevel@tonic-gate ire_delete(ire); 6090Sstevel@tonic-gate if (pire != NULL) { 6100Sstevel@tonic-gate IRB_REFRELE(pire->ire_bucket); 6110Sstevel@tonic-gate ire_refrele(pire); 6120Sstevel@tonic-gate } 6130Sstevel@tonic-gate *ire_p = NULL; 6140Sstevel@tonic-gate return (error); 6150Sstevel@tonic-gate } 6160Sstevel@tonic-gate /* 6170Sstevel@tonic-gate * To avoid creating ires having stale values for the ire_max_frag 6180Sstevel@tonic-gate * we get the latest value atomically here. For more details 6190Sstevel@tonic-gate * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 6200Sstevel@tonic-gate * in ip_rput_dlpi_writer 6210Sstevel@tonic-gate */ 6220Sstevel@tonic-gate if (ire->ire_max_fragp == NULL) { 6230Sstevel@tonic-gate if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 6240Sstevel@tonic-gate ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 6250Sstevel@tonic-gate else 6260Sstevel@tonic-gate ire->ire_max_frag = pire->ire_max_frag; 6270Sstevel@tonic-gate } else { 6280Sstevel@tonic-gate uint_t max_frag; 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate max_frag = *ire->ire_max_fragp; 6310Sstevel@tonic-gate ire->ire_max_fragp = NULL; 6320Sstevel@tonic-gate ire->ire_max_frag = max_frag; 6330Sstevel@tonic-gate } 6340Sstevel@tonic-gate 6350Sstevel@tonic-gate /* 6360Sstevel@tonic-gate * Atomically check for duplicate and insert in the table. 6370Sstevel@tonic-gate */ 6380Sstevel@tonic-gate for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 6390Sstevel@tonic-gate if (ire1->ire_marks & IRE_MARK_CONDEMNED) 6400Sstevel@tonic-gate continue; 6410Sstevel@tonic-gate 6420Sstevel@tonic-gate if (ire->ire_type == IRE_CACHE) { 6430Sstevel@tonic-gate /* 6440Sstevel@tonic-gate * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 6450Sstevel@tonic-gate * As ire_ipif and ire_stq could point to two 6460Sstevel@tonic-gate * different ills, we can't pass just ire_ipif to 6470Sstevel@tonic-gate * ire_match_args and get a match on both ills. 6480Sstevel@tonic-gate * This is just needed for duplicate checks here and 6490Sstevel@tonic-gate * so we don't add an extra argument to 6500Sstevel@tonic-gate * ire_match_args for this. Do it locally. 6510Sstevel@tonic-gate * 6520Sstevel@tonic-gate * NOTE : Currently there is no part of the code 6530Sstevel@tonic-gate * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 6540Sstevel@tonic-gate * match for IRE_CACHEs. Thus we don't want to 6550Sstevel@tonic-gate * extend the arguments to ire_match_args_v6. 6560Sstevel@tonic-gate */ 6570Sstevel@tonic-gate if (ire1->ire_stq != ire->ire_stq) 6580Sstevel@tonic-gate continue; 6590Sstevel@tonic-gate /* 6600Sstevel@tonic-gate * Multiroute IRE_CACHEs for a given destination can 6610Sstevel@tonic-gate * have the same ire_ipif, typically if their source 6620Sstevel@tonic-gate * address is forced using RTF_SETSRC, and the same 6630Sstevel@tonic-gate * send-to queue. We differentiate them using the parent 6640Sstevel@tonic-gate * handle. 6650Sstevel@tonic-gate */ 6660Sstevel@tonic-gate if ((ire1->ire_flags & RTF_MULTIRT) && 6670Sstevel@tonic-gate (ire->ire_flags & RTF_MULTIRT) && 6680Sstevel@tonic-gate (ire1->ire_phandle != ire->ire_phandle)) 6690Sstevel@tonic-gate continue; 6700Sstevel@tonic-gate } 6710Sstevel@tonic-gate if (ire1->ire_zoneid != ire->ire_zoneid) 6720Sstevel@tonic-gate continue; 6730Sstevel@tonic-gate if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 6740Sstevel@tonic-gate &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 6751676Sjpk ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 6761676Sjpk flags)) { 6770Sstevel@tonic-gate /* 6780Sstevel@tonic-gate * Return the old ire after doing a REFHOLD. 6790Sstevel@tonic-gate * As most of the callers continue to use the IRE 6800Sstevel@tonic-gate * after adding, we return a held ire. This will 6810Sstevel@tonic-gate * avoid a lookup in the caller again. If the callers 6820Sstevel@tonic-gate * don't want to use it, they need to do a REFRELE. 6830Sstevel@tonic-gate */ 6840Sstevel@tonic-gate ip1dbg(("found dup ire existing %p new %p", 6850Sstevel@tonic-gate (void *)ire1, (void *)ire)); 6860Sstevel@tonic-gate IRE_REFHOLD(ire1); 6870Sstevel@tonic-gate if (ndp_g_lock_held) 6883448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 6890Sstevel@tonic-gate ire_atomic_end(irb_ptr, ire); 6900Sstevel@tonic-gate ire_delete(ire); 6910Sstevel@tonic-gate if (pire != NULL) { 6920Sstevel@tonic-gate /* 6930Sstevel@tonic-gate * Assert that it is 6940Sstevel@tonic-gate * not yet removed from the list. 6950Sstevel@tonic-gate */ 6960Sstevel@tonic-gate ASSERT(pire->ire_ptpn != NULL); 6970Sstevel@tonic-gate IRB_REFRELE(pire->ire_bucket); 6980Sstevel@tonic-gate ire_refrele(pire); 6990Sstevel@tonic-gate } 7000Sstevel@tonic-gate *ire_p = ire1; 7010Sstevel@tonic-gate return (0); 7020Sstevel@tonic-gate } 7030Sstevel@tonic-gate } 7040Sstevel@tonic-gate if (ire->ire_type == IRE_CACHE) { 705*8485SPeter.Memishian@Sun.COM const in6_addr_t *addr_v6; 7060Sstevel@tonic-gate ill_t *ill = ire_to_ill(ire); 7070Sstevel@tonic-gate char buf[INET6_ADDRSTRLEN]; 7080Sstevel@tonic-gate nce_t *nce; 7090Sstevel@tonic-gate 7100Sstevel@tonic-gate /* 7110Sstevel@tonic-gate * All IRE_CACHE types must have a nce. If this is 7120Sstevel@tonic-gate * not the case the entry will not be added. We need 7130Sstevel@tonic-gate * to make sure that if somebody deletes the nce 7140Sstevel@tonic-gate * after we looked up, they will find this ire and 7150Sstevel@tonic-gate * delete the ire. To delete this ire one needs the 7160Sstevel@tonic-gate * bucket lock which we are still holding here. So, 7170Sstevel@tonic-gate * even if the nce gets deleted after we looked up, 7180Sstevel@tonic-gate * this ire will get deleted. 7190Sstevel@tonic-gate * 7200Sstevel@tonic-gate * NOTE : Don't need the ire_lock for accessing 7210Sstevel@tonic-gate * ire_gateway_addr_v6 as it is appearing first 7220Sstevel@tonic-gate * time on the list and rts_setgwr_v6 could not 7230Sstevel@tonic-gate * be changing this. 7240Sstevel@tonic-gate */ 725*8485SPeter.Memishian@Sun.COM addr_v6 = &ire->ire_gateway_addr_v6; 726*8485SPeter.Memishian@Sun.COM if (IN6_IS_ADDR_UNSPECIFIED(addr_v6)) 727*8485SPeter.Memishian@Sun.COM addr_v6 = &ire->ire_addr_v6; 728*8485SPeter.Memishian@Sun.COM 729*8485SPeter.Memishian@Sun.COM /* nce fastpath is per-ill; don't match across illgrp */ 730*8485SPeter.Memishian@Sun.COM nce = ndp_lookup_v6(ill, B_FALSE, addr_v6, B_TRUE); 7310Sstevel@tonic-gate if (nce == NULL) 7320Sstevel@tonic-gate goto failed; 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate /* Pair of refhold, refrele just to get the tracing right */ 7352535Ssangeeta NCE_REFHOLD_TO_REFHOLD_NOTR(nce); 7360Sstevel@tonic-gate /* 7370Sstevel@tonic-gate * Atomically make sure that new IREs don't point 7380Sstevel@tonic-gate * to an NCE that is logically deleted (CONDEMNED). 7390Sstevel@tonic-gate * ndp_delete() first marks the NCE CONDEMNED. 7400Sstevel@tonic-gate * This ensures that the nce_refcnt won't increase 7410Sstevel@tonic-gate * due to new nce_lookups or due to addition of new IREs 7420Sstevel@tonic-gate * pointing to this NCE. Then ndp_delete() cleans up 7430Sstevel@tonic-gate * existing references. If we don't do it atomically here, 7440Sstevel@tonic-gate * ndp_delete() -> nce_ire_delete() will not be able to 7450Sstevel@tonic-gate * clean up the IRE list completely, and the nce_refcnt 7460Sstevel@tonic-gate * won't go down to zero. 7470Sstevel@tonic-gate */ 7480Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 7490Sstevel@tonic-gate if (ill->ill_flags & ILLF_XRESOLV) { 7500Sstevel@tonic-gate /* 7510Sstevel@tonic-gate * If we used an external resolver, we may not 7520Sstevel@tonic-gate * have gone through neighbor discovery to get here. 7530Sstevel@tonic-gate * Must update the nce_state before the next check. 7540Sstevel@tonic-gate */ 7550Sstevel@tonic-gate if (nce->nce_state == ND_INCOMPLETE) 7560Sstevel@tonic-gate nce->nce_state = ND_REACHABLE; 7570Sstevel@tonic-gate } 7580Sstevel@tonic-gate if (nce->nce_state == ND_INCOMPLETE || 7590Sstevel@tonic-gate (nce->nce_flags & NCE_F_CONDEMNED) || 7600Sstevel@tonic-gate (nce->nce_state == ND_UNREACHABLE)) { 7610Sstevel@tonic-gate failed: 7620Sstevel@tonic-gate if (ndp_g_lock_held) 7633448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 7640Sstevel@tonic-gate if (nce != NULL) 7650Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7660Sstevel@tonic-gate ire_atomic_end(irb_ptr, ire); 7670Sstevel@tonic-gate ip1dbg(("ire_add_v6: No nce for dst %s \n", 7680Sstevel@tonic-gate inet_ntop(AF_INET6, &ire->ire_addr_v6, 7690Sstevel@tonic-gate buf, sizeof (buf)))); 7700Sstevel@tonic-gate ire_delete(ire); 7710Sstevel@tonic-gate if (pire != NULL) { 7720Sstevel@tonic-gate /* 7730Sstevel@tonic-gate * Assert that it is 7740Sstevel@tonic-gate * not yet removed from the list. 7750Sstevel@tonic-gate */ 7760Sstevel@tonic-gate ASSERT(pire->ire_ptpn != NULL); 7770Sstevel@tonic-gate IRB_REFRELE(pire->ire_bucket); 7780Sstevel@tonic-gate ire_refrele(pire); 7790Sstevel@tonic-gate } 7800Sstevel@tonic-gate if (nce != NULL) 7810Sstevel@tonic-gate NCE_REFRELE_NOTR(nce); 7820Sstevel@tonic-gate *ire_p = NULL; 7830Sstevel@tonic-gate return (EINVAL); 7840Sstevel@tonic-gate } else { 7850Sstevel@tonic-gate ire->ire_nce = nce; 7860Sstevel@tonic-gate } 7870Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7880Sstevel@tonic-gate } 7890Sstevel@tonic-gate /* 7900Sstevel@tonic-gate * Find the first entry that matches ire_addr - provides 7910Sstevel@tonic-gate * tail insertion. *irep will be null if no match. 7920Sstevel@tonic-gate */ 7930Sstevel@tonic-gate irep = (ire_t **)irb_ptr; 7940Sstevel@tonic-gate while ((ire1 = *irep) != NULL && 7950Sstevel@tonic-gate !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 7960Sstevel@tonic-gate irep = &ire1->ire_next; 7970Sstevel@tonic-gate ASSERT(!(ire->ire_type & IRE_BROADCAST)); 7980Sstevel@tonic-gate 7990Sstevel@tonic-gate if (*irep != NULL) { 8000Sstevel@tonic-gate /* 8010Sstevel@tonic-gate * Find the last ire which matches ire_addr_v6. 8020Sstevel@tonic-gate * Needed to do tail insertion among entries with the same 8030Sstevel@tonic-gate * ire_addr_v6. 8040Sstevel@tonic-gate */ 8050Sstevel@tonic-gate while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 8060Sstevel@tonic-gate &ire1->ire_addr_v6)) { 8070Sstevel@tonic-gate irep = &ire1->ire_next; 8080Sstevel@tonic-gate ire1 = *irep; 8090Sstevel@tonic-gate if (ire1 == NULL) 8100Sstevel@tonic-gate break; 8110Sstevel@tonic-gate } 8120Sstevel@tonic-gate } 8130Sstevel@tonic-gate 8140Sstevel@tonic-gate if (ire->ire_type == IRE_DEFAULT) { 8150Sstevel@tonic-gate /* 8160Sstevel@tonic-gate * We keep a count of default gateways which is used when 8170Sstevel@tonic-gate * assigning them as routes. 8180Sstevel@tonic-gate */ 8193448Sdh155122 ipst->ips_ipv6_ire_default_count++; 8203448Sdh155122 ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */ 8210Sstevel@tonic-gate } 8220Sstevel@tonic-gate /* Insert at *irep */ 8230Sstevel@tonic-gate ire1 = *irep; 8240Sstevel@tonic-gate if (ire1 != NULL) 8250Sstevel@tonic-gate ire1->ire_ptpn = &ire->ire_next; 8260Sstevel@tonic-gate ire->ire_next = ire1; 8270Sstevel@tonic-gate /* Link the new one in. */ 8280Sstevel@tonic-gate ire->ire_ptpn = irep; 8290Sstevel@tonic-gate /* 8300Sstevel@tonic-gate * ire_walk routines de-reference ire_next without holding 8310Sstevel@tonic-gate * a lock. Before we point to the new ire, we want to make 8320Sstevel@tonic-gate * sure the store that sets the ire_next of the new ire 8330Sstevel@tonic-gate * reaches global visibility, so that ire_walk routines 8340Sstevel@tonic-gate * don't see a truncated list of ires i.e if the ire_next 8350Sstevel@tonic-gate * of the new ire gets set after we do "*irep = ire" due 8360Sstevel@tonic-gate * to re-ordering, the ire_walk thread will see a NULL 8370Sstevel@tonic-gate * once it accesses the ire_next of the new ire. 8380Sstevel@tonic-gate * membar_producer() makes sure that the following store 8390Sstevel@tonic-gate * happens *after* all of the above stores. 8400Sstevel@tonic-gate */ 8410Sstevel@tonic-gate membar_producer(); 8420Sstevel@tonic-gate *irep = ire; 8430Sstevel@tonic-gate ire->ire_bucket = irb_ptr; 8440Sstevel@tonic-gate /* 8450Sstevel@tonic-gate * We return a bumped up IRE above. Keep it symmetrical 8460Sstevel@tonic-gate * so that the callers will always have to release. This 8470Sstevel@tonic-gate * helps the callers of this function because they continue 8480Sstevel@tonic-gate * to use the IRE after adding and hence they don't have to 8490Sstevel@tonic-gate * lookup again after we return the IRE. 8500Sstevel@tonic-gate * 8510Sstevel@tonic-gate * NOTE : We don't have to use atomics as this is appearing 8520Sstevel@tonic-gate * in the list for the first time and no one else can bump 8530Sstevel@tonic-gate * up the reference count on this yet. 8540Sstevel@tonic-gate */ 8550Sstevel@tonic-gate IRE_REFHOLD_LOCKED(ire); 8563448Sdh155122 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 8570Sstevel@tonic-gate irb_ptr->irb_ire_cnt++; 8580Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_TEMPORARY) 8590Sstevel@tonic-gate irb_ptr->irb_tmp_ire_cnt++; 8600Sstevel@tonic-gate 8610Sstevel@tonic-gate if (ire->ire_ipif != NULL) { 8626255Ssowmini DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ire->ire_ipif, 8636255Ssowmini (char *), "ire", (void *), ire); 8646379Ssowmini ire->ire_ipif->ipif_ire_cnt++; 8650Sstevel@tonic-gate if (ire->ire_stq != NULL) { 8660Sstevel@tonic-gate stq_ill = (ill_t *)ire->ire_stq->q_ptr; 8676255Ssowmini DTRACE_PROBE3(ill__incr__cnt, (ill_t *), stq_ill, 8686255Ssowmini (char *), "ire", (void *), ire); 8696379Ssowmini stq_ill->ill_ire_cnt++; 8700Sstevel@tonic-gate } 8710Sstevel@tonic-gate } else { 8720Sstevel@tonic-gate ASSERT(ire->ire_stq == NULL); 8730Sstevel@tonic-gate } 8740Sstevel@tonic-gate 8750Sstevel@tonic-gate if (ndp_g_lock_held) 8763448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 8770Sstevel@tonic-gate ire_atomic_end(irb_ptr, ire); 8780Sstevel@tonic-gate 8790Sstevel@tonic-gate if (pire != NULL) { 8800Sstevel@tonic-gate /* Assert that it is not removed from the list yet */ 8810Sstevel@tonic-gate ASSERT(pire->ire_ptpn != NULL); 8820Sstevel@tonic-gate IRB_REFRELE(pire->ire_bucket); 8830Sstevel@tonic-gate ire_refrele(pire); 8840Sstevel@tonic-gate } 8850Sstevel@tonic-gate 8860Sstevel@tonic-gate if (ire->ire_type != IRE_CACHE) { 8870Sstevel@tonic-gate /* 8880Sstevel@tonic-gate * For ire's with with host mask see if there is an entry 8890Sstevel@tonic-gate * in the cache. If there is one flush the whole cache as 8900Sstevel@tonic-gate * there might be multiple entries due to RTF_MULTIRT (CGTP). 8910Sstevel@tonic-gate * If no entry is found than there is no need to flush the 8920Sstevel@tonic-gate * cache. 8930Sstevel@tonic-gate */ 8940Sstevel@tonic-gate 8950Sstevel@tonic-gate if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 8960Sstevel@tonic-gate ire_t *lire; 8970Sstevel@tonic-gate lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 8983448Sdh155122 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 8993448Sdh155122 ipst); 9000Sstevel@tonic-gate if (lire != NULL) { 9010Sstevel@tonic-gate ire_refrele(lire); 9020Sstevel@tonic-gate ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 9030Sstevel@tonic-gate } 9040Sstevel@tonic-gate } else { 9050Sstevel@tonic-gate ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 9060Sstevel@tonic-gate } 9070Sstevel@tonic-gate } 9080Sstevel@tonic-gate 9090Sstevel@tonic-gate *ire_p = ire; 9100Sstevel@tonic-gate return (0); 9110Sstevel@tonic-gate } 9120Sstevel@tonic-gate 9130Sstevel@tonic-gate /* 9140Sstevel@tonic-gate * Search for all HOST REDIRECT routes that are 9150Sstevel@tonic-gate * pointing at the specified gateway and 9160Sstevel@tonic-gate * delete them. This routine is called only 9170Sstevel@tonic-gate * when a default gateway is going away. 9180Sstevel@tonic-gate */ 9190Sstevel@tonic-gate static void 9203448Sdh155122 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 9210Sstevel@tonic-gate { 9220Sstevel@tonic-gate irb_t *irb_ptr; 9230Sstevel@tonic-gate irb_t *irb; 9240Sstevel@tonic-gate ire_t *ire; 9250Sstevel@tonic-gate in6_addr_t gw_addr_v6; 9260Sstevel@tonic-gate int i; 9270Sstevel@tonic-gate 9280Sstevel@tonic-gate /* get the hash table for HOST routes */ 9293448Sdh155122 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 9300Sstevel@tonic-gate if (irb_ptr == NULL) 9310Sstevel@tonic-gate return; 9323448Sdh155122 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 9330Sstevel@tonic-gate irb = &irb_ptr[i]; 9340Sstevel@tonic-gate IRB_REFHOLD(irb); 9350Sstevel@tonic-gate for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 9363004Sdd193516 if (!(ire->ire_flags & RTF_DYNAMIC)) 9370Sstevel@tonic-gate continue; 9380Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 9390Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 9400Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 9410Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 9420Sstevel@tonic-gate ire_delete(ire); 9430Sstevel@tonic-gate } 9440Sstevel@tonic-gate IRB_REFRELE(irb); 9450Sstevel@tonic-gate } 9460Sstevel@tonic-gate } 9470Sstevel@tonic-gate 9480Sstevel@tonic-gate /* 9490Sstevel@tonic-gate * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 9500Sstevel@tonic-gate * of ip_ire_clookup_and_delete. The difference being this function does not 9510Sstevel@tonic-gate * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 9520Sstevel@tonic-gate * different than IPv4 in that, regardless of the presence of a cache entry 9530Sstevel@tonic-gate * for this address, an ire_walk_v6 is done. Another difference is that unlike 9540Sstevel@tonic-gate * in the case of IPv4 this does not take an ipif_t argument, since it is only 9550Sstevel@tonic-gate * called by ip_arp_news and the match is always only on the address. 9560Sstevel@tonic-gate */ 9570Sstevel@tonic-gate void 9583448Sdh155122 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst) 9590Sstevel@tonic-gate { 9600Sstevel@tonic-gate irb_t *irb; 9610Sstevel@tonic-gate ire_t *cire; 9620Sstevel@tonic-gate boolean_t found = B_FALSE; 9630Sstevel@tonic-gate 9643448Sdh155122 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 9654714Ssowmini ipst->ips_ip6_cache_table_size)]; 9660Sstevel@tonic-gate IRB_REFHOLD(irb); 9670Sstevel@tonic-gate for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 9683448Sdh155122 if (cire->ire_marks & IRE_MARK_CONDEMNED) 9690Sstevel@tonic-gate continue; 9700Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 9710Sstevel@tonic-gate 9720Sstevel@tonic-gate /* This signifies start of a match */ 9730Sstevel@tonic-gate if (!found) 9740Sstevel@tonic-gate found = B_TRUE; 9750Sstevel@tonic-gate if (cire->ire_type == IRE_CACHE) { 9760Sstevel@tonic-gate if (cire->ire_nce != NULL) 9770Sstevel@tonic-gate ndp_delete(cire->ire_nce); 9780Sstevel@tonic-gate ire_delete_v6(cire); 9790Sstevel@tonic-gate } 9800Sstevel@tonic-gate /* End of the match */ 9810Sstevel@tonic-gate } else if (found) 9820Sstevel@tonic-gate break; 9830Sstevel@tonic-gate } 9840Sstevel@tonic-gate IRB_REFRELE(irb); 9850Sstevel@tonic-gate } 9860Sstevel@tonic-gate 9870Sstevel@tonic-gate /* 9880Sstevel@tonic-gate * Delete the specified IRE. 9890Sstevel@tonic-gate * All calls should use ire_delete(). 9900Sstevel@tonic-gate * Sometimes called as writer though not required by this function. 9910Sstevel@tonic-gate * 9920Sstevel@tonic-gate * NOTE : This function is called only if the ire was added 9930Sstevel@tonic-gate * in the list. 9940Sstevel@tonic-gate */ 9950Sstevel@tonic-gate void 9960Sstevel@tonic-gate ire_delete_v6(ire_t *ire) 9970Sstevel@tonic-gate { 9980Sstevel@tonic-gate in6_addr_t gw_addr_v6; 9993448Sdh155122 ip_stack_t *ipst = ire->ire_ipst; 10000Sstevel@tonic-gate 10010Sstevel@tonic-gate ASSERT(ire->ire_refcnt >= 1); 10020Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION); 10030Sstevel@tonic-gate 10040Sstevel@tonic-gate if (ire->ire_type != IRE_CACHE) 10050Sstevel@tonic-gate ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 10060Sstevel@tonic-gate if (ire->ire_type == IRE_DEFAULT) { 10070Sstevel@tonic-gate /* 10080Sstevel@tonic-gate * when a default gateway is going away 10090Sstevel@tonic-gate * delete all the host redirects pointing at that 10100Sstevel@tonic-gate * gateway. 10110Sstevel@tonic-gate */ 10120Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 10130Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 10140Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 10153448Sdh155122 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 10160Sstevel@tonic-gate } 10170Sstevel@tonic-gate } 10180Sstevel@tonic-gate 10190Sstevel@tonic-gate /* 10203004Sdd193516 * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect 10210Sstevel@tonic-gate * entries. 10220Sstevel@tonic-gate */ 10230Sstevel@tonic-gate /*ARGSUSED1*/ 10240Sstevel@tonic-gate void 10250Sstevel@tonic-gate ire_delete_cache_v6(ire_t *ire, char *arg) 10260Sstevel@tonic-gate { 10270Sstevel@tonic-gate char addrstr1[INET6_ADDRSTRLEN]; 10280Sstevel@tonic-gate char addrstr2[INET6_ADDRSTRLEN]; 10290Sstevel@tonic-gate 10303004Sdd193516 if ((ire->ire_type & IRE_CACHE) || 10313004Sdd193516 (ire->ire_flags & RTF_DYNAMIC)) { 10320Sstevel@tonic-gate ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 10330Sstevel@tonic-gate inet_ntop(AF_INET6, &ire->ire_addr_v6, 10344714Ssowmini addrstr1, sizeof (addrstr1)), 10350Sstevel@tonic-gate ire->ire_type, 10360Sstevel@tonic-gate inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 10374714Ssowmini addrstr2, sizeof (addrstr2)))); 10380Sstevel@tonic-gate ire_delete(ire); 10390Sstevel@tonic-gate } 10400Sstevel@tonic-gate 10410Sstevel@tonic-gate } 10420Sstevel@tonic-gate 10430Sstevel@tonic-gate /* 10443004Sdd193516 * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries 10450Sstevel@tonic-gate * that have a given gateway address. 10460Sstevel@tonic-gate */ 10470Sstevel@tonic-gate void 10480Sstevel@tonic-gate ire_delete_cache_gw_v6(ire_t *ire, char *addr) 10490Sstevel@tonic-gate { 10500Sstevel@tonic-gate in6_addr_t *gw_addr = (in6_addr_t *)addr; 10510Sstevel@tonic-gate char buf1[INET6_ADDRSTRLEN]; 10520Sstevel@tonic-gate char buf2[INET6_ADDRSTRLEN]; 10530Sstevel@tonic-gate in6_addr_t ire_gw_addr_v6; 10540Sstevel@tonic-gate 10553004Sdd193516 if (!(ire->ire_type & IRE_CACHE) && 10563004Sdd193516 !(ire->ire_flags & RTF_DYNAMIC)) 10570Sstevel@tonic-gate return; 10580Sstevel@tonic-gate 10590Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 10600Sstevel@tonic-gate ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 10610Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 10620Sstevel@tonic-gate 10630Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 10640Sstevel@tonic-gate ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 10650Sstevel@tonic-gate inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 10660Sstevel@tonic-gate buf1, sizeof (buf1)), 10670Sstevel@tonic-gate ire->ire_type, 10680Sstevel@tonic-gate inet_ntop(AF_INET6, &ire_gw_addr_v6, 10690Sstevel@tonic-gate buf2, sizeof (buf2)))); 10700Sstevel@tonic-gate ire_delete(ire); 10710Sstevel@tonic-gate } 10720Sstevel@tonic-gate } 10730Sstevel@tonic-gate 10740Sstevel@tonic-gate /* 10750Sstevel@tonic-gate * Remove all IRE_CACHE entries that match 10760Sstevel@tonic-gate * the ire specified. (Sometimes called 10770Sstevel@tonic-gate * as writer though not required by this function.) 10780Sstevel@tonic-gate * 10790Sstevel@tonic-gate * The flag argument indicates if the 10800Sstevel@tonic-gate * flush request is due to addition 10810Sstevel@tonic-gate * of new route (IRE_FLUSH_ADD) or deletion of old 10820Sstevel@tonic-gate * route (IRE_FLUSH_DELETE). 10830Sstevel@tonic-gate * 10840Sstevel@tonic-gate * This routine takes only the IREs from the forwarding 10850Sstevel@tonic-gate * table and flushes the corresponding entries from 10860Sstevel@tonic-gate * the cache table. 10870Sstevel@tonic-gate * 10880Sstevel@tonic-gate * When flushing due to the deletion of an old route, it 10890Sstevel@tonic-gate * just checks the cache handles (ire_phandle and ire_ihandle) and 10900Sstevel@tonic-gate * deletes the ones that match. 10910Sstevel@tonic-gate * 10920Sstevel@tonic-gate * When flushing due to the creation of a new route, it checks 10930Sstevel@tonic-gate * if a cache entry's address matches the one in the IRE and 10940Sstevel@tonic-gate * that the cache entry's parent has a less specific mask than the 10950Sstevel@tonic-gate * one in IRE. The destination of such a cache entry could be the 10960Sstevel@tonic-gate * gateway for other cache entries, so we need to flush those as 10970Sstevel@tonic-gate * well by looking for gateway addresses matching the IRE's address. 10980Sstevel@tonic-gate */ 10990Sstevel@tonic-gate void 11000Sstevel@tonic-gate ire_flush_cache_v6(ire_t *ire, int flag) 11010Sstevel@tonic-gate { 11020Sstevel@tonic-gate int i; 11030Sstevel@tonic-gate ire_t *cire; 11040Sstevel@tonic-gate irb_t *irb; 11053448Sdh155122 ip_stack_t *ipst = ire->ire_ipst; 11060Sstevel@tonic-gate 11070Sstevel@tonic-gate if (ire->ire_type & IRE_CACHE) 11084714Ssowmini return; 11090Sstevel@tonic-gate 11100Sstevel@tonic-gate /* 11110Sstevel@tonic-gate * If a default is just created, there is no point 11120Sstevel@tonic-gate * in going through the cache, as there will not be any 11130Sstevel@tonic-gate * cached ires. 11140Sstevel@tonic-gate */ 11150Sstevel@tonic-gate if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 11160Sstevel@tonic-gate return; 11170Sstevel@tonic-gate if (flag == IRE_FLUSH_ADD) { 11180Sstevel@tonic-gate /* 11190Sstevel@tonic-gate * This selective flush is 11200Sstevel@tonic-gate * due to the addition of 11210Sstevel@tonic-gate * new IRE. 11220Sstevel@tonic-gate */ 11233448Sdh155122 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 11243448Sdh155122 irb = &ipst->ips_ip_cache_table_v6[i]; 11250Sstevel@tonic-gate if ((cire = irb->irb_ire) == NULL) 11260Sstevel@tonic-gate continue; 11270Sstevel@tonic-gate IRB_REFHOLD(irb); 11280Sstevel@tonic-gate for (cire = irb->irb_ire; cire != NULL; 11290Sstevel@tonic-gate cire = cire->ire_next) { 11300Sstevel@tonic-gate if (cire->ire_type != IRE_CACHE) 11310Sstevel@tonic-gate continue; 11320Sstevel@tonic-gate /* 11330Sstevel@tonic-gate * If 'cire' belongs to the same subnet 11340Sstevel@tonic-gate * as the new ire being added, and 'cire' 11350Sstevel@tonic-gate * is derived from a prefix that is less 11360Sstevel@tonic-gate * specific than the new ire being added, 11370Sstevel@tonic-gate * we need to flush 'cire'; for instance, 11380Sstevel@tonic-gate * when a new interface comes up. 11390Sstevel@tonic-gate */ 11400Sstevel@tonic-gate if ((V6_MASK_EQ_2(cire->ire_addr_v6, 11410Sstevel@tonic-gate ire->ire_mask_v6, ire->ire_addr_v6) && 11420Sstevel@tonic-gate (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 11430Sstevel@tonic-gate ire->ire_masklen))) { 11440Sstevel@tonic-gate ire_delete(cire); 11450Sstevel@tonic-gate continue; 11460Sstevel@tonic-gate } 11470Sstevel@tonic-gate /* 11480Sstevel@tonic-gate * This is the case when the ire_gateway_addr 11490Sstevel@tonic-gate * of 'cire' belongs to the same subnet as 11500Sstevel@tonic-gate * the new ire being added. 11510Sstevel@tonic-gate * Flushing such ires is sometimes required to 11520Sstevel@tonic-gate * avoid misrouting: say we have a machine with 11530Sstevel@tonic-gate * two interfaces (I1 and I2), a default router 11540Sstevel@tonic-gate * R on the I1 subnet, and a host route to an 11550Sstevel@tonic-gate * off-link destination D with a gateway G on 11560Sstevel@tonic-gate * the I2 subnet. 11570Sstevel@tonic-gate * Under normal operation, we will have an 11580Sstevel@tonic-gate * on-link cache entry for G and an off-link 11590Sstevel@tonic-gate * cache entry for D with G as ire_gateway_addr, 11600Sstevel@tonic-gate * traffic to D will reach its destination 11610Sstevel@tonic-gate * through gateway G. 11620Sstevel@tonic-gate * If the administrator does 'ifconfig I2 down', 11630Sstevel@tonic-gate * the cache entries for D and G will be 11640Sstevel@tonic-gate * flushed. However, G will now be resolved as 11650Sstevel@tonic-gate * an off-link destination using R (the default 11660Sstevel@tonic-gate * router) as gateway. Then D will also be 11670Sstevel@tonic-gate * resolved as an off-link destination using G 11680Sstevel@tonic-gate * as gateway - this behavior is due to 11690Sstevel@tonic-gate * compatibility reasons, see comment in 11700Sstevel@tonic-gate * ire_ihandle_lookup_offlink(). Traffic to D 11710Sstevel@tonic-gate * will go to the router R and probably won't 11720Sstevel@tonic-gate * reach the destination. 11730Sstevel@tonic-gate * The administrator then does 'ifconfig I2 up'. 11740Sstevel@tonic-gate * Since G is on the I2 subnet, this routine 11750Sstevel@tonic-gate * will flush its cache entry. It must also 11760Sstevel@tonic-gate * flush the cache entry for D, otherwise 11770Sstevel@tonic-gate * traffic will stay misrouted until the IRE 11780Sstevel@tonic-gate * times out. 11790Sstevel@tonic-gate */ 11800Sstevel@tonic-gate if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 11810Sstevel@tonic-gate ire->ire_mask_v6, ire->ire_addr_v6)) { 11820Sstevel@tonic-gate ire_delete(cire); 11830Sstevel@tonic-gate continue; 11840Sstevel@tonic-gate } 11850Sstevel@tonic-gate } 11860Sstevel@tonic-gate IRB_REFRELE(irb); 11870Sstevel@tonic-gate } 11880Sstevel@tonic-gate } else { 11890Sstevel@tonic-gate /* 11900Sstevel@tonic-gate * delete the cache entries based on 11910Sstevel@tonic-gate * handle in the IRE as this IRE is 11920Sstevel@tonic-gate * being deleted/changed. 11930Sstevel@tonic-gate */ 11943448Sdh155122 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 11953448Sdh155122 irb = &ipst->ips_ip_cache_table_v6[i]; 11960Sstevel@tonic-gate if ((cire = irb->irb_ire) == NULL) 11970Sstevel@tonic-gate continue; 11980Sstevel@tonic-gate IRB_REFHOLD(irb); 11990Sstevel@tonic-gate for (cire = irb->irb_ire; cire != NULL; 12000Sstevel@tonic-gate cire = cire->ire_next) { 12010Sstevel@tonic-gate if (cire->ire_type != IRE_CACHE) 12020Sstevel@tonic-gate continue; 12030Sstevel@tonic-gate if ((cire->ire_phandle == 0 || 12040Sstevel@tonic-gate cire->ire_phandle != ire->ire_phandle) && 12050Sstevel@tonic-gate (cire->ire_ihandle == 0 || 12060Sstevel@tonic-gate cire->ire_ihandle != ire->ire_ihandle)) 12070Sstevel@tonic-gate continue; 12080Sstevel@tonic-gate ire_delete(cire); 12090Sstevel@tonic-gate } 12100Sstevel@tonic-gate IRB_REFRELE(irb); 12110Sstevel@tonic-gate } 12120Sstevel@tonic-gate } 12130Sstevel@tonic-gate } 12140Sstevel@tonic-gate 12150Sstevel@tonic-gate /* 12160Sstevel@tonic-gate * Matches the arguments passed with the values in the ire. 12170Sstevel@tonic-gate * 12180Sstevel@tonic-gate * Note: for match types that match using "ipif" passed in, ipif 12190Sstevel@tonic-gate * must be checked for non-NULL before calling this routine. 12200Sstevel@tonic-gate */ 12210Sstevel@tonic-gate static boolean_t 12220Sstevel@tonic-gate ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 12231676Sjpk const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 12241676Sjpk uint32_t ihandle, const ts_label_t *tsl, int match_flags) 12250Sstevel@tonic-gate { 12260Sstevel@tonic-gate in6_addr_t masked_addr; 12270Sstevel@tonic-gate in6_addr_t gw_addr_v6; 12280Sstevel@tonic-gate ill_t *ire_ill = NULL, *dst_ill; 12290Sstevel@tonic-gate ill_t *ipif_ill = NULL; 12300Sstevel@tonic-gate ipif_t *src_ipif; 12310Sstevel@tonic-gate 12320Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION); 12330Sstevel@tonic-gate ASSERT(addr != NULL); 12340Sstevel@tonic-gate ASSERT(mask != NULL); 12350Sstevel@tonic-gate ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1236*8485SPeter.Memishian@Sun.COM ASSERT((!(match_flags & MATCH_IRE_ILL)) || 12370Sstevel@tonic-gate (ipif != NULL && ipif->ipif_isv6)); 12380Sstevel@tonic-gate 12390Sstevel@tonic-gate /* 1240*8485SPeter.Memishian@Sun.COM * If MATCH_IRE_MARK_TESTHIDDEN is set, then only return the IRE if it 1241*8485SPeter.Memishian@Sun.COM * is in fact hidden, to ensure the caller gets the right one. One 1242*8485SPeter.Memishian@Sun.COM * exception: if the caller passed MATCH_IRE_IHANDLE, then they 1243*8485SPeter.Memishian@Sun.COM * already know the identity of the given IRE_INTERFACE entry and 1244*8485SPeter.Memishian@Sun.COM * there's no point trying to hide it from them. 12450Sstevel@tonic-gate */ 1246*8485SPeter.Memishian@Sun.COM if (ire->ire_marks & IRE_MARK_TESTHIDDEN) { 1247*8485SPeter.Memishian@Sun.COM if (match_flags & MATCH_IRE_IHANDLE) 1248*8485SPeter.Memishian@Sun.COM match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 1249*8485SPeter.Memishian@Sun.COM 1250*8485SPeter.Memishian@Sun.COM if (!(match_flags & MATCH_IRE_MARK_TESTHIDDEN)) 1251*8485SPeter.Memishian@Sun.COM return (B_FALSE); 1252*8485SPeter.Memishian@Sun.COM } 12530Sstevel@tonic-gate 12541676Sjpk if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 12551676Sjpk ire->ire_zoneid != ALL_ZONES) { 12560Sstevel@tonic-gate /* 12570Sstevel@tonic-gate * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 12580Sstevel@tonic-gate * valid and does not match that of ire_zoneid, a failure to 12590Sstevel@tonic-gate * match is reported at this point. Otherwise, since some IREs 12600Sstevel@tonic-gate * that are available in the global zone can be used in local 12610Sstevel@tonic-gate * zones, additional checks need to be performed: 12620Sstevel@tonic-gate * 12630Sstevel@tonic-gate * IRE_CACHE and IRE_LOOPBACK entries should 12640Sstevel@tonic-gate * never be matched in this situation. 12650Sstevel@tonic-gate * 12660Sstevel@tonic-gate * IRE entries that have an interface associated with them 12670Sstevel@tonic-gate * should in general not match unless they are an IRE_LOCAL 12680Sstevel@tonic-gate * or in the case when MATCH_IRE_DEFAULT has been set in 12690Sstevel@tonic-gate * the caller. In the case of the former, checking of the 12700Sstevel@tonic-gate * other fields supplied should take place. 12710Sstevel@tonic-gate * 12720Sstevel@tonic-gate * In the case where MATCH_IRE_DEFAULT has been set, 12730Sstevel@tonic-gate * all of the ipif's associated with the IRE's ill are 12740Sstevel@tonic-gate * checked to see if there is a matching zoneid. If any 12750Sstevel@tonic-gate * one ipif has a matching zoneid, this IRE is a 12760Sstevel@tonic-gate * potential candidate so checking of the other fields 12770Sstevel@tonic-gate * takes place. 12780Sstevel@tonic-gate * 12790Sstevel@tonic-gate * In the case where the IRE_INTERFACE has a usable source 12800Sstevel@tonic-gate * address (indicated by ill_usesrc_ifindex) in the 12810Sstevel@tonic-gate * correct zone then it's permitted to return this IRE 12820Sstevel@tonic-gate */ 12830Sstevel@tonic-gate if (match_flags & MATCH_IRE_ZONEONLY) 12840Sstevel@tonic-gate return (B_FALSE); 12850Sstevel@tonic-gate if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 12860Sstevel@tonic-gate return (B_FALSE); 12870Sstevel@tonic-gate /* 12880Sstevel@tonic-gate * Note, IRE_INTERFACE can have the stq as NULL. For 12890Sstevel@tonic-gate * example, if the default multicast route is tied to 12900Sstevel@tonic-gate * the loopback address. 12910Sstevel@tonic-gate */ 12920Sstevel@tonic-gate if ((ire->ire_type & IRE_INTERFACE) && 12930Sstevel@tonic-gate (ire->ire_stq != NULL)) { 12940Sstevel@tonic-gate dst_ill = (ill_t *)ire->ire_stq->q_ptr; 12950Sstevel@tonic-gate /* 12960Sstevel@tonic-gate * If there is a usable source address in the 12970Sstevel@tonic-gate * zone, then it's ok to return an 12980Sstevel@tonic-gate * IRE_INTERFACE 12990Sstevel@tonic-gate */ 13000Sstevel@tonic-gate if ((dst_ill->ill_usesrc_ifindex != 0) && 13010Sstevel@tonic-gate (src_ipif = ipif_select_source_v6(dst_ill, addr, 1302*8485SPeter.Memishian@Sun.COM B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 13030Sstevel@tonic-gate != NULL) { 13040Sstevel@tonic-gate ip3dbg(("ire_match_args: src_ipif %p" 13050Sstevel@tonic-gate " dst_ill %p", (void *)src_ipif, 13060Sstevel@tonic-gate (void *)dst_ill)); 13070Sstevel@tonic-gate ipif_refrele(src_ipif); 13080Sstevel@tonic-gate } else { 13090Sstevel@tonic-gate ip3dbg(("ire_match_args: src_ipif NULL" 13100Sstevel@tonic-gate " dst_ill %p\n", (void *)dst_ill)); 13110Sstevel@tonic-gate return (B_FALSE); 13120Sstevel@tonic-gate } 13130Sstevel@tonic-gate } 13140Sstevel@tonic-gate if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 13150Sstevel@tonic-gate !(ire->ire_type & IRE_INTERFACE)) { 13160Sstevel@tonic-gate ipif_t *tipif; 13170Sstevel@tonic-gate 13180Sstevel@tonic-gate if ((match_flags & MATCH_IRE_DEFAULT) == 0) 13190Sstevel@tonic-gate return (B_FALSE); 13200Sstevel@tonic-gate mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 13210Sstevel@tonic-gate for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 13220Sstevel@tonic-gate tipif != NULL; tipif = tipif->ipif_next) { 13230Sstevel@tonic-gate if (IPIF_CAN_LOOKUP(tipif) && 13240Sstevel@tonic-gate (tipif->ipif_flags & IPIF_UP) && 13251676Sjpk (tipif->ipif_zoneid == zoneid || 13261676Sjpk tipif->ipif_zoneid == ALL_ZONES)) 13270Sstevel@tonic-gate break; 13280Sstevel@tonic-gate } 13290Sstevel@tonic-gate mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 13300Sstevel@tonic-gate if (tipif == NULL) 13310Sstevel@tonic-gate return (B_FALSE); 13320Sstevel@tonic-gate } 13330Sstevel@tonic-gate } 13340Sstevel@tonic-gate 13350Sstevel@tonic-gate if (match_flags & MATCH_IRE_GW) { 13360Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 13370Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 13380Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 13390Sstevel@tonic-gate } 1340*8485SPeter.Memishian@Sun.COM 13410Sstevel@tonic-gate /* 1342*8485SPeter.Memishian@Sun.COM * For IRE_CACHE entries, MATCH_IRE_ILL means that somebody wants to 1343*8485SPeter.Memishian@Sun.COM * send out ire_stq (ire_ipif for IRE_CACHE entries is just the means 1344*8485SPeter.Memishian@Sun.COM * of getting a source address -- i.e., ire_src_addr_v6 == 1345*8485SPeter.Memishian@Sun.COM * ire->ire_ipif->ipif_v6src_addr). ire_to_ill() handles this. 1346*8485SPeter.Memishian@Sun.COM * 1347*8485SPeter.Memishian@Sun.COM * NOTE: For IPMP, MATCH_IRE_ILL usually matches any ill in the group. 1348*8485SPeter.Memishian@Sun.COM * However, if MATCH_IRE_MARK_TESTHIDDEN is set (i.e., the IRE is for 1349*8485SPeter.Memishian@Sun.COM * IPMP test traffic), then the ill must match exactly. 13500Sstevel@tonic-gate */ 1351*8485SPeter.Memishian@Sun.COM if (match_flags & MATCH_IRE_ILL) { 13520Sstevel@tonic-gate ire_ill = ire_to_ill(ire); 13530Sstevel@tonic-gate ipif_ill = ipif->ipif_ill; 13540Sstevel@tonic-gate } 13550Sstevel@tonic-gate 13560Sstevel@tonic-gate /* No ire_addr_v6 bits set past the mask */ 13570Sstevel@tonic-gate ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 13580Sstevel@tonic-gate ire->ire_addr_v6)); 13590Sstevel@tonic-gate V6_MASK_COPY(*addr, *mask, masked_addr); 13600Sstevel@tonic-gate 13610Sstevel@tonic-gate if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 13620Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_GW)) || 13634714Ssowmini IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 13640Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_TYPE)) || 13654714Ssowmini (ire->ire_type & type)) && 13660Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_SRC)) || 13674714Ssowmini IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 13684714Ssowmini &ipif->ipif_v6src_addr)) && 13690Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_IPIF)) || 13704714Ssowmini (ire->ire_ipif == ipif)) && 1371*8485SPeter.Memishian@Sun.COM ((!(match_flags & MATCH_IRE_MARK_TESTHIDDEN)) || 1372*8485SPeter.Memishian@Sun.COM (ire->ire_marks & IRE_MARK_TESTHIDDEN)) && 13730Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_ILL)) || 1374*8485SPeter.Memishian@Sun.COM (ire_ill == ipif_ill || 1375*8485SPeter.Memishian@Sun.COM (!(match_flags & MATCH_IRE_MARK_TESTHIDDEN) && 1376*8485SPeter.Memishian@Sun.COM ire_ill != NULL && IS_IN_SAME_ILLGRP(ipif_ill, ire_ill)))) && 13770Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_IHANDLE)) || 13784714Ssowmini (ire->ire_ihandle == ihandle)) && 13791676Sjpk ((!(match_flags & MATCH_IRE_SECATTR)) || 13804714Ssowmini (!is_system_labeled()) || 13814714Ssowmini (tsol_ire_match_gwattr(ire, tsl) == 0))) { 13820Sstevel@tonic-gate /* We found the matched IRE */ 13830Sstevel@tonic-gate return (B_TRUE); 13840Sstevel@tonic-gate } 13850Sstevel@tonic-gate return (B_FALSE); 13860Sstevel@tonic-gate } 13870Sstevel@tonic-gate 13880Sstevel@tonic-gate /* 13890Sstevel@tonic-gate * Lookup for a route in all the tables 13900Sstevel@tonic-gate */ 13910Sstevel@tonic-gate ire_t * 13920Sstevel@tonic-gate ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 13931676Sjpk const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 13943448Sdh155122 zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) 13950Sstevel@tonic-gate { 13960Sstevel@tonic-gate ire_t *ire = NULL; 13970Sstevel@tonic-gate 13980Sstevel@tonic-gate /* 13990Sstevel@tonic-gate * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 14000Sstevel@tonic-gate * MATCH_IRE_ILL is set. 14010Sstevel@tonic-gate */ 1402*8485SPeter.Memishian@Sun.COM if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL)) && (ipif == NULL)) 14030Sstevel@tonic-gate return (NULL); 14040Sstevel@tonic-gate 14050Sstevel@tonic-gate /* 14060Sstevel@tonic-gate * might be asking for a cache lookup, 14070Sstevel@tonic-gate * This is not best way to lookup cache, 14080Sstevel@tonic-gate * user should call ire_cache_lookup directly. 14090Sstevel@tonic-gate * 14100Sstevel@tonic-gate * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 14110Sstevel@tonic-gate * in the forwarding table, if the applicable type flags were set. 14120Sstevel@tonic-gate */ 14130Sstevel@tonic-gate if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 14140Sstevel@tonic-gate ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 14153448Sdh155122 tsl, flags, ipst); 14160Sstevel@tonic-gate if (ire != NULL) 14170Sstevel@tonic-gate return (ire); 14180Sstevel@tonic-gate } 14190Sstevel@tonic-gate if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 14200Sstevel@tonic-gate ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 14213448Sdh155122 pire, zoneid, 0, tsl, flags, ipst); 14220Sstevel@tonic-gate } 14230Sstevel@tonic-gate return (ire); 14240Sstevel@tonic-gate } 14250Sstevel@tonic-gate 14260Sstevel@tonic-gate /* 14270Sstevel@tonic-gate * Lookup a route in forwarding table. 14280Sstevel@tonic-gate * specific lookup is indicated by passing the 14290Sstevel@tonic-gate * required parameters and indicating the 14300Sstevel@tonic-gate * match required in flag field. 14310Sstevel@tonic-gate * 14320Sstevel@tonic-gate * Looking for default route can be done in three ways 14330Sstevel@tonic-gate * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 14340Sstevel@tonic-gate * along with other matches. 14350Sstevel@tonic-gate * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 14360Sstevel@tonic-gate * field along with other matches. 14370Sstevel@tonic-gate * 3) if the destination and mask are passed as zeros. 14380Sstevel@tonic-gate * 14390Sstevel@tonic-gate * A request to return a default route if no route 14400Sstevel@tonic-gate * is found, can be specified by setting MATCH_IRE_DEFAULT 14410Sstevel@tonic-gate * in flags. 14420Sstevel@tonic-gate * 14430Sstevel@tonic-gate * It does not support recursion more than one level. It 14440Sstevel@tonic-gate * will do recursive lookup only when the lookup maps to 14450Sstevel@tonic-gate * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 14460Sstevel@tonic-gate * 14470Sstevel@tonic-gate * If the routing table is setup to allow more than one level 14480Sstevel@tonic-gate * of recursion, the cleaning up cache table will not work resulting 14490Sstevel@tonic-gate * in invalid routing. 14500Sstevel@tonic-gate * 14510Sstevel@tonic-gate * Supports link-local addresses by following the ipif/ill when recursing. 14520Sstevel@tonic-gate * 14530Sstevel@tonic-gate * NOTE : When this function returns NULL, pire has already been released. 14540Sstevel@tonic-gate * pire is valid only when this function successfully returns an 14550Sstevel@tonic-gate * ire. 14560Sstevel@tonic-gate */ 14570Sstevel@tonic-gate ire_t * 14580Sstevel@tonic-gate ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 14591676Sjpk const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 14603448Sdh155122 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags, 14613448Sdh155122 ip_stack_t *ipst) 14620Sstevel@tonic-gate { 14630Sstevel@tonic-gate irb_t *irb_ptr; 14640Sstevel@tonic-gate ire_t *rire; 14650Sstevel@tonic-gate ire_t *ire = NULL; 14660Sstevel@tonic-gate ire_t *saved_ire; 14670Sstevel@tonic-gate nce_t *nce; 14680Sstevel@tonic-gate int i; 14690Sstevel@tonic-gate in6_addr_t gw_addr_v6; 14700Sstevel@tonic-gate 14710Sstevel@tonic-gate ASSERT(addr != NULL); 14720Sstevel@tonic-gate ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 14730Sstevel@tonic-gate ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 14740Sstevel@tonic-gate ASSERT(ipif == NULL || ipif->ipif_isv6); 14750Sstevel@tonic-gate 14760Sstevel@tonic-gate /* 14770Sstevel@tonic-gate * When we return NULL from this function, we should make 14780Sstevel@tonic-gate * sure that *pire is NULL so that the callers will not 14790Sstevel@tonic-gate * wrongly REFRELE the pire. 14800Sstevel@tonic-gate */ 14810Sstevel@tonic-gate if (pire != NULL) 14820Sstevel@tonic-gate *pire = NULL; 14830Sstevel@tonic-gate /* 14840Sstevel@tonic-gate * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 14850Sstevel@tonic-gate * MATCH_IRE_ILL is set. 14860Sstevel@tonic-gate */ 1487*8485SPeter.Memishian@Sun.COM if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL)) && (ipif == NULL)) 14880Sstevel@tonic-gate return (NULL); 14890Sstevel@tonic-gate 14900Sstevel@tonic-gate /* 14910Sstevel@tonic-gate * If the mask is known, the lookup 14920Sstevel@tonic-gate * is simple, if the mask is not known 14930Sstevel@tonic-gate * we need to search. 14940Sstevel@tonic-gate */ 14950Sstevel@tonic-gate if (flags & MATCH_IRE_MASK) { 14960Sstevel@tonic-gate uint_t masklen; 14970Sstevel@tonic-gate 14980Sstevel@tonic-gate masklen = ip_mask_to_plen_v6(mask); 14993448Sdh155122 if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) 15000Sstevel@tonic-gate return (NULL); 15013448Sdh155122 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 15023448Sdh155122 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 15034714Ssowmini ipst->ips_ip6_ftable_hash_size)]); 15040Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 15050Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 15060Sstevel@tonic-gate ire = ire->ire_next) { 15070Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 15080Sstevel@tonic-gate continue; 15090Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, mask, gateway, type, 15101676Sjpk ipif, zoneid, ihandle, tsl, flags)) 15110Sstevel@tonic-gate goto found_ire; 15120Sstevel@tonic-gate } 15130Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 15140Sstevel@tonic-gate } else { 15150Sstevel@tonic-gate /* 15160Sstevel@tonic-gate * In this case we don't know the mask, we need to 15170Sstevel@tonic-gate * search the table assuming different mask sizes. 15180Sstevel@tonic-gate * we start with 128 bit mask, we don't allow default here. 15190Sstevel@tonic-gate */ 15200Sstevel@tonic-gate for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 15210Sstevel@tonic-gate in6_addr_t tmpmask; 15220Sstevel@tonic-gate 15233448Sdh155122 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 15240Sstevel@tonic-gate continue; 15250Sstevel@tonic-gate (void) ip_plen_to_mask_v6(i, &tmpmask); 15263448Sdh155122 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 15270Sstevel@tonic-gate IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 15283448Sdh155122 ipst->ips_ip6_ftable_hash_size)]; 15290Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 15300Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 15310Sstevel@tonic-gate ire = ire->ire_next) { 15320Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 15330Sstevel@tonic-gate continue; 15340Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, 15350Sstevel@tonic-gate &ire->ire_mask_v6, gateway, type, ipif, 15361676Sjpk zoneid, ihandle, tsl, flags)) 15370Sstevel@tonic-gate goto found_ire; 15380Sstevel@tonic-gate } 15390Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 15400Sstevel@tonic-gate } 15410Sstevel@tonic-gate } 15420Sstevel@tonic-gate 15430Sstevel@tonic-gate /* 15440Sstevel@tonic-gate * We come here if no route has yet been found. 15450Sstevel@tonic-gate * 15460Sstevel@tonic-gate * Handle the case where default route is 15470Sstevel@tonic-gate * requested by specifying type as one of the possible 15480Sstevel@tonic-gate * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 15490Sstevel@tonic-gate * 15500Sstevel@tonic-gate * If MATCH_IRE_MASK is specified, then the appropriate default route 15510Sstevel@tonic-gate * would have been found above if it exists so it isn't looked up here. 15520Sstevel@tonic-gate * If MATCH_IRE_DEFAULT was also specified, then a default route will be 15530Sstevel@tonic-gate * searched for later. 15540Sstevel@tonic-gate */ 15550Sstevel@tonic-gate if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 15560Sstevel@tonic-gate (type & (IRE_DEFAULT | IRE_INTERFACE))) { 15573448Sdh155122 if (ipst->ips_ip_forwarding_table_v6[0] != NULL) { 15580Sstevel@tonic-gate /* addr & mask is zero for defaults */ 15593448Sdh155122 irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][ 15600Sstevel@tonic-gate IRE_ADDR_HASH_V6(ipv6_all_zeros, 15613448Sdh155122 ipst->ips_ip6_ftable_hash_size)]; 15620Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 15630Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 15640Sstevel@tonic-gate ire = ire->ire_next) { 15650Sstevel@tonic-gate 15660Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 15670Sstevel@tonic-gate continue; 15680Sstevel@tonic-gate 15690Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, 15700Sstevel@tonic-gate &ipv6_all_zeros, gateway, type, ipif, 15711676Sjpk zoneid, ihandle, tsl, flags)) 15720Sstevel@tonic-gate goto found_ire; 15730Sstevel@tonic-gate } 15740Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 15750Sstevel@tonic-gate } 15760Sstevel@tonic-gate } 15770Sstevel@tonic-gate /* 15780Sstevel@tonic-gate * We come here only if no route is found. 15790Sstevel@tonic-gate * see if the default route can be used which is allowed 15800Sstevel@tonic-gate * only if the default matching criteria is specified. 15810Sstevel@tonic-gate * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 15820Sstevel@tonic-gate * entries. However, the ip_forwarding_table_v6[0] also contains 15830Sstevel@tonic-gate * interface routes thus the count can be zero. 15840Sstevel@tonic-gate */ 15850Sstevel@tonic-gate saved_ire = NULL; 15860Sstevel@tonic-gate if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 15870Sstevel@tonic-gate MATCH_IRE_DEFAULT) { 15880Sstevel@tonic-gate ire_t *ire_origin; 15890Sstevel@tonic-gate uint_t g_index; 15900Sstevel@tonic-gate uint_t index; 15910Sstevel@tonic-gate 15923448Sdh155122 if (ipst->ips_ip_forwarding_table_v6[0] == NULL) 15930Sstevel@tonic-gate return (NULL); 15943448Sdh155122 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0]; 15950Sstevel@tonic-gate 15960Sstevel@tonic-gate /* 15970Sstevel@tonic-gate * Keep a tab on the bucket while looking the IRE_DEFAULT 15980Sstevel@tonic-gate * entries. We need to keep track of a particular IRE 15990Sstevel@tonic-gate * (ire_origin) so this ensures that it will not be unlinked 16000Sstevel@tonic-gate * from the hash list during the recursive lookup below. 16010Sstevel@tonic-gate */ 16020Sstevel@tonic-gate IRB_REFHOLD(irb_ptr); 16030Sstevel@tonic-gate ire = irb_ptr->irb_ire; 16040Sstevel@tonic-gate if (ire == NULL) { 16050Sstevel@tonic-gate IRB_REFRELE(irb_ptr); 16060Sstevel@tonic-gate return (NULL); 16070Sstevel@tonic-gate } 16080Sstevel@tonic-gate 16090Sstevel@tonic-gate /* 16100Sstevel@tonic-gate * Get the index first, since it can be changed by other 16110Sstevel@tonic-gate * threads. Then get to the right default route skipping 16120Sstevel@tonic-gate * default interface routes if any. As we hold a reference on 16130Sstevel@tonic-gate * the IRE bucket, ipv6_ire_default_count can only increase so 16140Sstevel@tonic-gate * we can't reach the end of the hash list unexpectedly. 16150Sstevel@tonic-gate */ 16163448Sdh155122 if (ipst->ips_ipv6_ire_default_count != 0) { 16173448Sdh155122 g_index = ipst->ips_ipv6_ire_default_index++; 16183448Sdh155122 index = g_index % ipst->ips_ipv6_ire_default_count; 16190Sstevel@tonic-gate while (index != 0) { 16200Sstevel@tonic-gate if (!(ire->ire_type & IRE_INTERFACE)) 16210Sstevel@tonic-gate index--; 16220Sstevel@tonic-gate ire = ire->ire_next; 16230Sstevel@tonic-gate } 16240Sstevel@tonic-gate ASSERT(ire != NULL); 16250Sstevel@tonic-gate } else { 16260Sstevel@tonic-gate /* 16270Sstevel@tonic-gate * No default route, so we only have default interface 16280Sstevel@tonic-gate * routes: don't enter the first loop. 16290Sstevel@tonic-gate */ 16300Sstevel@tonic-gate ire = NULL; 16310Sstevel@tonic-gate } 16320Sstevel@tonic-gate 16330Sstevel@tonic-gate /* 16340Sstevel@tonic-gate * Round-robin the default routers list looking for a neighbor 16350Sstevel@tonic-gate * that matches the passed in parameters and is reachable. If 16360Sstevel@tonic-gate * none found, just return a route from the default router list 16370Sstevel@tonic-gate * if it exists. If we can't find a default route (IRE_DEFAULT), 16380Sstevel@tonic-gate * look for interface default routes. 16390Sstevel@tonic-gate * We start with the ire we found above and we walk the hash 16400Sstevel@tonic-gate * list until we're back where we started, see 16410Sstevel@tonic-gate * ire_get_next_default_ire(). It doesn't matter if default 16420Sstevel@tonic-gate * routes are added or deleted by other threads - we know this 16430Sstevel@tonic-gate * ire will stay in the list because we hold a reference on the 16440Sstevel@tonic-gate * ire bucket. 16450Sstevel@tonic-gate * NB: if we only have interface default routes, ire is NULL so 16460Sstevel@tonic-gate * we don't even enter this loop (see above). 16470Sstevel@tonic-gate */ 16480Sstevel@tonic-gate ire_origin = ire; 16490Sstevel@tonic-gate for (; ire != NULL; 16500Sstevel@tonic-gate ire = ire_get_next_default_ire(ire, ire_origin)) { 16510Sstevel@tonic-gate 16520Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, 16530Sstevel@tonic-gate &ipv6_all_zeros, gateway, type, ipif, 16541676Sjpk zoneid, ihandle, tsl, flags)) { 16550Sstevel@tonic-gate int match_flags; 16560Sstevel@tonic-gate 16570Sstevel@tonic-gate /* 16580Sstevel@tonic-gate * We have something to work with. 16590Sstevel@tonic-gate * If we can find a resolved/reachable 16600Sstevel@tonic-gate * entry, we will use this. Otherwise 16610Sstevel@tonic-gate * we'll try to find an entry that has 16620Sstevel@tonic-gate * a resolved cache entry. We will fallback 16630Sstevel@tonic-gate * on this if we don't find anything else. 16640Sstevel@tonic-gate */ 16650Sstevel@tonic-gate if (saved_ire == NULL) 16660Sstevel@tonic-gate saved_ire = ire; 16670Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 16680Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 16690Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 1670*8485SPeter.Memishian@Sun.COM match_flags = MATCH_IRE_ILL | MATCH_IRE_SECATTR; 16710Sstevel@tonic-gate rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 16723448Sdh155122 0, ire->ire_ipif, zoneid, tsl, match_flags, 16733448Sdh155122 ipst); 16740Sstevel@tonic-gate if (rire != NULL) { 16750Sstevel@tonic-gate nce = rire->ire_nce; 16760Sstevel@tonic-gate if (nce != NULL && 16770Sstevel@tonic-gate NCE_ISREACHABLE(nce) && 16780Sstevel@tonic-gate nce->nce_flags & NCE_F_ISROUTER) { 16790Sstevel@tonic-gate ire_refrele(rire); 16800Sstevel@tonic-gate IRE_REFHOLD(ire); 16810Sstevel@tonic-gate IRB_REFRELE(irb_ptr); 16820Sstevel@tonic-gate goto found_ire_held; 16830Sstevel@tonic-gate } else if (nce != NULL && 16840Sstevel@tonic-gate !(nce->nce_flags & 16850Sstevel@tonic-gate NCE_F_ISROUTER)) { 16860Sstevel@tonic-gate /* 16870Sstevel@tonic-gate * Make sure we don't use 16880Sstevel@tonic-gate * this ire 16890Sstevel@tonic-gate */ 16900Sstevel@tonic-gate if (saved_ire == ire) 16910Sstevel@tonic-gate saved_ire = NULL; 16920Sstevel@tonic-gate } 16930Sstevel@tonic-gate ire_refrele(rire); 16943448Sdh155122 } else if (ipst-> 16953448Sdh155122 ips_ipv6_ire_default_count > 1 && 16964333Snordmark zoneid != GLOBAL_ZONEID) { 16970Sstevel@tonic-gate /* 16980Sstevel@tonic-gate * When we're in a local zone, we're 16990Sstevel@tonic-gate * only interested in default routers 17000Sstevel@tonic-gate * that are reachable through ipifs 17010Sstevel@tonic-gate * within our zone. 17020Sstevel@tonic-gate * The potentially expensive call to 17030Sstevel@tonic-gate * ire_route_lookup_v6() is avoided when 17040Sstevel@tonic-gate * we have only one default route. 17050Sstevel@tonic-gate */ 17064333Snordmark int ire_match_flags = MATCH_IRE_TYPE | 17074333Snordmark MATCH_IRE_SECATTR; 17084333Snordmark 17094333Snordmark if (ire->ire_ipif != NULL) { 17104333Snordmark ire_match_flags |= 1711*8485SPeter.Memishian@Sun.COM MATCH_IRE_ILL; 17124333Snordmark } 17130Sstevel@tonic-gate rire = ire_route_lookup_v6(&gw_addr_v6, 17142733Snordmark NULL, NULL, IRE_INTERFACE, 17152733Snordmark ire->ire_ipif, NULL, 17164333Snordmark zoneid, tsl, ire_match_flags, ipst); 17170Sstevel@tonic-gate if (rire != NULL) { 17180Sstevel@tonic-gate ire_refrele(rire); 17190Sstevel@tonic-gate saved_ire = ire; 17200Sstevel@tonic-gate } else if (saved_ire == ire) { 17210Sstevel@tonic-gate /* 17220Sstevel@tonic-gate * Make sure we don't use 17230Sstevel@tonic-gate * this ire 17240Sstevel@tonic-gate */ 17250Sstevel@tonic-gate saved_ire = NULL; 17260Sstevel@tonic-gate } 17270Sstevel@tonic-gate } 17280Sstevel@tonic-gate } 17290Sstevel@tonic-gate } 17300Sstevel@tonic-gate if (saved_ire != NULL) { 17310Sstevel@tonic-gate ire = saved_ire; 17320Sstevel@tonic-gate IRE_REFHOLD(ire); 17330Sstevel@tonic-gate IRB_REFRELE(irb_ptr); 17340Sstevel@tonic-gate goto found_ire_held; 17350Sstevel@tonic-gate } else { 17360Sstevel@tonic-gate /* 17370Sstevel@tonic-gate * Look for a interface default route matching the 17380Sstevel@tonic-gate * args passed in. No round robin here. Just pick 17390Sstevel@tonic-gate * the right one. 17400Sstevel@tonic-gate */ 17410Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 17420Sstevel@tonic-gate ire = ire->ire_next) { 17430Sstevel@tonic-gate 17440Sstevel@tonic-gate if (!(ire->ire_type & IRE_INTERFACE)) 17450Sstevel@tonic-gate continue; 17460Sstevel@tonic-gate 17470Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 17480Sstevel@tonic-gate continue; 17490Sstevel@tonic-gate 17500Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, 17510Sstevel@tonic-gate &ipv6_all_zeros, gateway, type, ipif, 17521676Sjpk zoneid, ihandle, tsl, flags)) { 17530Sstevel@tonic-gate IRE_REFHOLD(ire); 17540Sstevel@tonic-gate IRB_REFRELE(irb_ptr); 17550Sstevel@tonic-gate goto found_ire_held; 17560Sstevel@tonic-gate } 17570Sstevel@tonic-gate } 17580Sstevel@tonic-gate IRB_REFRELE(irb_ptr); 17590Sstevel@tonic-gate } 17600Sstevel@tonic-gate } 17610Sstevel@tonic-gate ASSERT(ire == NULL); 17620Sstevel@tonic-gate ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 17630Sstevel@tonic-gate return (NULL); 17640Sstevel@tonic-gate found_ire: 17650Sstevel@tonic-gate ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 17660Sstevel@tonic-gate IRE_REFHOLD(ire); 17670Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 17680Sstevel@tonic-gate 17690Sstevel@tonic-gate found_ire_held: 17700Sstevel@tonic-gate if ((flags & MATCH_IRE_RJ_BHOLE) && 17710Sstevel@tonic-gate (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 17720Sstevel@tonic-gate return (ire); 17730Sstevel@tonic-gate } 17740Sstevel@tonic-gate /* 17750Sstevel@tonic-gate * At this point, IRE that was found must be an IRE_FORWARDTABLE 17760Sstevel@tonic-gate * or IRE_CACHETABLE type. If this is a recursive lookup and an 17770Sstevel@tonic-gate * IRE_INTERFACE type was found, return that. If it was some other 17780Sstevel@tonic-gate * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 17790Sstevel@tonic-gate * is necessary to fill in the parent IRE pointed to by pire, and 17800Sstevel@tonic-gate * then lookup the gateway address of the parent. For backwards 17810Sstevel@tonic-gate * compatiblity, if this lookup returns an 17820Sstevel@tonic-gate * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 17830Sstevel@tonic-gate * of lookup is done. 17840Sstevel@tonic-gate */ 17850Sstevel@tonic-gate if (flags & MATCH_IRE_RECURSIVE) { 17861676Sjpk const ipif_t *gw_ipif; 17870Sstevel@tonic-gate int match_flags = MATCH_IRE_DSTONLY; 17880Sstevel@tonic-gate 17890Sstevel@tonic-gate if (ire->ire_type & IRE_INTERFACE) 17900Sstevel@tonic-gate return (ire); 17910Sstevel@tonic-gate if (pire != NULL) 17920Sstevel@tonic-gate *pire = ire; 17930Sstevel@tonic-gate /* 17940Sstevel@tonic-gate * If we can't find an IRE_INTERFACE or the caller has not 17950Sstevel@tonic-gate * asked for pire, we need to REFRELE the saved_ire. 17960Sstevel@tonic-gate */ 17970Sstevel@tonic-gate saved_ire = ire; 17980Sstevel@tonic-gate 17990Sstevel@tonic-gate if (ire->ire_ipif != NULL) 1800*8485SPeter.Memishian@Sun.COM match_flags |= MATCH_IRE_ILL; 18010Sstevel@tonic-gate 18020Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 18030Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 18040Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 18050Sstevel@tonic-gate 18060Sstevel@tonic-gate ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 18073448Sdh155122 ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); 18080Sstevel@tonic-gate if (ire == NULL) { 18090Sstevel@tonic-gate /* 18100Sstevel@tonic-gate * In this case we have to deal with the 18110Sstevel@tonic-gate * MATCH_IRE_PARENT flag, which means the 18120Sstevel@tonic-gate * parent has to be returned if ire is NULL. 18130Sstevel@tonic-gate * The aim of this is to have (at least) a starting 18140Sstevel@tonic-gate * ire when we want to look at all of the ires in a 18150Sstevel@tonic-gate * bucket aimed at a single destination (as is the 18160Sstevel@tonic-gate * case in ip_newroute_v6 for the RTF_MULTIRT 18170Sstevel@tonic-gate * flagged routes). 18180Sstevel@tonic-gate */ 18190Sstevel@tonic-gate if (flags & MATCH_IRE_PARENT) { 18200Sstevel@tonic-gate if (pire != NULL) { 18210Sstevel@tonic-gate /* 18220Sstevel@tonic-gate * Need an extra REFHOLD, if the 18230Sstevel@tonic-gate * parent ire is returned via both 18240Sstevel@tonic-gate * ire and pire. 18250Sstevel@tonic-gate */ 18260Sstevel@tonic-gate IRE_REFHOLD(saved_ire); 18270Sstevel@tonic-gate } 18280Sstevel@tonic-gate ire = saved_ire; 18290Sstevel@tonic-gate } else { 18300Sstevel@tonic-gate ire_refrele(saved_ire); 18310Sstevel@tonic-gate if (pire != NULL) 18320Sstevel@tonic-gate *pire = NULL; 18330Sstevel@tonic-gate } 18340Sstevel@tonic-gate return (ire); 18350Sstevel@tonic-gate } 18360Sstevel@tonic-gate if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 18370Sstevel@tonic-gate /* 18380Sstevel@tonic-gate * If the caller did not ask for pire, release 18390Sstevel@tonic-gate * it now. 18400Sstevel@tonic-gate */ 18410Sstevel@tonic-gate if (pire == NULL) { 18420Sstevel@tonic-gate ire_refrele(saved_ire); 18430Sstevel@tonic-gate } 18440Sstevel@tonic-gate return (ire); 18450Sstevel@tonic-gate } 18460Sstevel@tonic-gate match_flags |= MATCH_IRE_TYPE; 18470Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 18480Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 18490Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 18500Sstevel@tonic-gate gw_ipif = ire->ire_ipif; 18510Sstevel@tonic-gate ire_refrele(ire); 18520Sstevel@tonic-gate ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 18530Sstevel@tonic-gate (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 18543448Sdh155122 NULL, match_flags, ipst); 18550Sstevel@tonic-gate if (ire == NULL) { 18560Sstevel@tonic-gate /* 18570Sstevel@tonic-gate * In this case we have to deal with the 18580Sstevel@tonic-gate * MATCH_IRE_PARENT flag, which means the 18590Sstevel@tonic-gate * parent has to be returned if ire is NULL. 18600Sstevel@tonic-gate * The aim of this is to have (at least) a starting 18610Sstevel@tonic-gate * ire when we want to look at all of the ires in a 18620Sstevel@tonic-gate * bucket aimed at a single destination (as is the 18630Sstevel@tonic-gate * case in ip_newroute_v6 for the RTF_MULTIRT 18640Sstevel@tonic-gate * flagged routes). 18650Sstevel@tonic-gate */ 18660Sstevel@tonic-gate if (flags & MATCH_IRE_PARENT) { 18670Sstevel@tonic-gate if (pire != NULL) { 18680Sstevel@tonic-gate /* 18690Sstevel@tonic-gate * Need an extra REFHOLD, if the 18700Sstevel@tonic-gate * parent ire is returned via both 18710Sstevel@tonic-gate * ire and pire. 18720Sstevel@tonic-gate */ 18730Sstevel@tonic-gate IRE_REFHOLD(saved_ire); 18740Sstevel@tonic-gate } 18750Sstevel@tonic-gate ire = saved_ire; 18760Sstevel@tonic-gate } else { 18770Sstevel@tonic-gate ire_refrele(saved_ire); 18780Sstevel@tonic-gate if (pire != NULL) 18790Sstevel@tonic-gate *pire = NULL; 18800Sstevel@tonic-gate } 18810Sstevel@tonic-gate return (ire); 18820Sstevel@tonic-gate } else if (pire == NULL) { 18830Sstevel@tonic-gate /* 18840Sstevel@tonic-gate * If the caller did not ask for pire, release 18850Sstevel@tonic-gate * it now. 18860Sstevel@tonic-gate */ 18870Sstevel@tonic-gate ire_refrele(saved_ire); 18880Sstevel@tonic-gate } 18890Sstevel@tonic-gate return (ire); 18900Sstevel@tonic-gate } 18910Sstevel@tonic-gate 18920Sstevel@tonic-gate ASSERT(pire == NULL || *pire == NULL); 18930Sstevel@tonic-gate return (ire); 18940Sstevel@tonic-gate } 18950Sstevel@tonic-gate 18960Sstevel@tonic-gate /* 18971676Sjpk * Delete the IRE cache for the gateway and all IRE caches whose 18981676Sjpk * ire_gateway_addr_v6 points to this gateway, and allow them to 18991676Sjpk * be created on demand by ip_newroute_v6. 19001676Sjpk */ 19011676Sjpk void 19023448Sdh155122 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid, 19033448Sdh155122 ip_stack_t *ipst) 19041676Sjpk { 19051676Sjpk irb_t *irb; 19061676Sjpk ire_t *ire; 19071676Sjpk 19083448Sdh155122 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 19093448Sdh155122 ipst->ips_ip6_cache_table_size)]; 19101676Sjpk IRB_REFHOLD(irb); 19111676Sjpk for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 19121676Sjpk if (ire->ire_marks & IRE_MARK_CONDEMNED) 19131676Sjpk continue; 19141676Sjpk 19151676Sjpk ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 19161676Sjpk if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 19171676Sjpk IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 19181676Sjpk ire_delete(ire); 19191676Sjpk } 19201676Sjpk } 19211676Sjpk IRB_REFRELE(irb); 19221676Sjpk 19233448Sdh155122 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst); 19241676Sjpk } 19251676Sjpk 19261676Sjpk /* 19270Sstevel@tonic-gate * Looks up cache table for a route. 19280Sstevel@tonic-gate * specific lookup can be indicated by 19290Sstevel@tonic-gate * passing the MATCH_* flags and the 19300Sstevel@tonic-gate * necessary parameters. 19310Sstevel@tonic-gate */ 19320Sstevel@tonic-gate ire_t * 19330Sstevel@tonic-gate ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 19341676Sjpk int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 19353448Sdh155122 int flags, ip_stack_t *ipst) 19360Sstevel@tonic-gate { 19377880SJonathan.Anderson@Sun.COM ire_ctable_args_t margs; 19380Sstevel@tonic-gate 19397880SJonathan.Anderson@Sun.COM margs.ict_addr = (void *)addr; 19407880SJonathan.Anderson@Sun.COM margs.ict_gateway = (void *)gateway; 19417880SJonathan.Anderson@Sun.COM margs.ict_type = type; 19427880SJonathan.Anderson@Sun.COM margs.ict_ipif = ipif; 19437880SJonathan.Anderson@Sun.COM margs.ict_zoneid = zoneid; 19447880SJonathan.Anderson@Sun.COM margs.ict_tsl = tsl; 19457880SJonathan.Anderson@Sun.COM margs.ict_flags = flags; 19467880SJonathan.Anderson@Sun.COM margs.ict_ipst = ipst; 19477880SJonathan.Anderson@Sun.COM margs.ict_wq = NULL; 19480Sstevel@tonic-gate 19497880SJonathan.Anderson@Sun.COM return (ip6_ctable_lookup_impl(&margs)); 19500Sstevel@tonic-gate } 19510Sstevel@tonic-gate 19520Sstevel@tonic-gate /* 1953*8485SPeter.Memishian@Sun.COM * Lookup cache. 19542733Snordmark * 19552733Snordmark * In general the zoneid has to match (where ALL_ZONES match all of them). 19562733Snordmark * But for IRE_LOCAL we also need to handle the case where L2 should 19572733Snordmark * conceptually loop back the packet. This is necessary since neither 19582733Snordmark * Ethernet drivers nor Ethernet hardware loops back packets sent to their 19592733Snordmark * own MAC address. This loopback is needed when the normal 19602733Snordmark * routes (ignoring IREs with different zoneids) would send out the packet on 1961*8485SPeter.Memishian@Sun.COM * the same ill as the ill with which this IRE_LOCAL is associated. 19622733Snordmark * 19632733Snordmark * Earlier versions of this code always matched an IRE_LOCAL independently of 19642733Snordmark * the zoneid. We preserve that earlier behavior when 19652733Snordmark * ip_restrict_interzone_loopback is turned off. 19660Sstevel@tonic-gate */ 19670Sstevel@tonic-gate ire_t * 19681676Sjpk ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 19693448Sdh155122 const ts_label_t *tsl, ip_stack_t *ipst) 19700Sstevel@tonic-gate { 19710Sstevel@tonic-gate irb_t *irb_ptr; 19720Sstevel@tonic-gate ire_t *ire; 19730Sstevel@tonic-gate 19743448Sdh155122 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 19753448Sdh155122 ipst->ips_ip6_cache_table_size)]; 19760Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 19770Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 1978*8485SPeter.Memishian@Sun.COM if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_TESTHIDDEN)) 19790Sstevel@tonic-gate continue; 19800Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 19811676Sjpk /* 19821676Sjpk * Finally, check if the security policy has any 19831676Sjpk * restriction on using this route for the specified 19841676Sjpk * message. 19851676Sjpk */ 19861676Sjpk if (tsl != NULL && 19871676Sjpk ire->ire_gw_secattr != NULL && 19881676Sjpk tsol_ire_match_gwattr(ire, tsl) != 0) { 19891676Sjpk continue; 19901676Sjpk } 19911676Sjpk 19920Sstevel@tonic-gate if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 19932733Snordmark ire->ire_zoneid == ALL_ZONES) { 19942733Snordmark IRE_REFHOLD(ire); 19952733Snordmark rw_exit(&irb_ptr->irb_lock); 19962733Snordmark return (ire); 19972733Snordmark } 19982733Snordmark 19992733Snordmark if (ire->ire_type == IRE_LOCAL) { 20003448Sdh155122 if (ipst->ips_ip_restrict_interzone_loopback && 20012733Snordmark !ire_local_ok_across_zones(ire, zoneid, 20023448Sdh155122 (void *)addr, tsl, ipst)) 20032733Snordmark continue; 20042733Snordmark 20050Sstevel@tonic-gate IRE_REFHOLD(ire); 20060Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 20070Sstevel@tonic-gate return (ire); 20080Sstevel@tonic-gate } 20090Sstevel@tonic-gate } 20100Sstevel@tonic-gate } 20110Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 20120Sstevel@tonic-gate return (NULL); 20130Sstevel@tonic-gate } 20140Sstevel@tonic-gate 20150Sstevel@tonic-gate /* 20160Sstevel@tonic-gate * Locate the interface ire that is tied to the cache ire 'cire' via 20170Sstevel@tonic-gate * cire->ire_ihandle. 20180Sstevel@tonic-gate * 20190Sstevel@tonic-gate * We are trying to create the cache ire for an onlink destn. or 20200Sstevel@tonic-gate * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 20210Sstevel@tonic-gate * case for xresolv interfaces, after the ire has come back from 20220Sstevel@tonic-gate * an external resolver. 20230Sstevel@tonic-gate */ 20240Sstevel@tonic-gate static ire_t * 20250Sstevel@tonic-gate ire_ihandle_lookup_onlink_v6(ire_t *cire) 20260Sstevel@tonic-gate { 20270Sstevel@tonic-gate ire_t *ire; 20280Sstevel@tonic-gate int match_flags; 20290Sstevel@tonic-gate int i; 20300Sstevel@tonic-gate int j; 20310Sstevel@tonic-gate irb_t *irb_ptr; 20323448Sdh155122 ip_stack_t *ipst = cire->ire_ipst; 20330Sstevel@tonic-gate 20340Sstevel@tonic-gate ASSERT(cire != NULL); 20350Sstevel@tonic-gate 20360Sstevel@tonic-gate match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 20370Sstevel@tonic-gate /* 20380Sstevel@tonic-gate * We know that the mask of the interface ire equals cire->ire_cmask. 20390Sstevel@tonic-gate * (When ip_newroute_v6() created 'cire' for an on-link destn. 20400Sstevel@tonic-gate * it set its cmask from the interface ire's mask) 20410Sstevel@tonic-gate */ 20420Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 20430Sstevel@tonic-gate NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 20443448Sdh155122 NULL, match_flags, ipst); 20450Sstevel@tonic-gate if (ire != NULL) 20460Sstevel@tonic-gate return (ire); 20470Sstevel@tonic-gate /* 20480Sstevel@tonic-gate * If we didn't find an interface ire above, we can't declare failure. 20490Sstevel@tonic-gate * For backwards compatibility, we need to support prefix routes 20500Sstevel@tonic-gate * pointing to next hop gateways that are not on-link. 20510Sstevel@tonic-gate * 20520Sstevel@tonic-gate * In the resolver/noresolver case, ip_newroute_v6() thinks 20530Sstevel@tonic-gate * it is creating the cache ire for an onlink destination in 'cire'. 20540Sstevel@tonic-gate * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 20550Sstevel@tonic-gate * cheated it, by doing ire_route_lookup_v6() twice and returning an 20560Sstevel@tonic-gate * interface ire. 20570Sstevel@tonic-gate * 20580Sstevel@tonic-gate * Eg. default - gw1 (line 1) 20590Sstevel@tonic-gate * gw1 - gw2 (line 2) 20600Sstevel@tonic-gate * gw2 - hme0 (line 3) 20610Sstevel@tonic-gate * 20620Sstevel@tonic-gate * In the above example, ip_newroute_v6() tried to create the cache ire 20630Sstevel@tonic-gate * 'cire' for gw1, based on the interface route in line 3. The 20640Sstevel@tonic-gate * ire_ftable_lookup_v6() above fails, because there is 20650Sstevel@tonic-gate * no interface route to reach gw1. (it is gw2). We fall thru below. 20660Sstevel@tonic-gate * 20670Sstevel@tonic-gate * Do a brute force search based on the ihandle in a subset of the 20680Sstevel@tonic-gate * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 20690Sstevel@tonic-gate * things become very complex, since we don't have 'pire' in this 20700Sstevel@tonic-gate * case. (Also note that this method is not possible in the offlink 20710Sstevel@tonic-gate * case because we don't know the mask) 20720Sstevel@tonic-gate */ 20730Sstevel@tonic-gate i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 20743448Sdh155122 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 20750Sstevel@tonic-gate return (NULL); 20763448Sdh155122 for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { 20773448Sdh155122 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j]; 20780Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 20790Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 20800Sstevel@tonic-gate ire = ire->ire_next) { 20810Sstevel@tonic-gate if (ire->ire_marks & IRE_MARK_CONDEMNED) 20820Sstevel@tonic-gate continue; 20830Sstevel@tonic-gate if ((ire->ire_type & IRE_INTERFACE) && 20840Sstevel@tonic-gate (ire->ire_ihandle == cire->ire_ihandle)) { 20850Sstevel@tonic-gate IRE_REFHOLD(ire); 20860Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 20870Sstevel@tonic-gate return (ire); 20880Sstevel@tonic-gate } 20890Sstevel@tonic-gate } 20900Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 20910Sstevel@tonic-gate } 20920Sstevel@tonic-gate return (NULL); 20930Sstevel@tonic-gate } 20940Sstevel@tonic-gate 20950Sstevel@tonic-gate 20960Sstevel@tonic-gate /* 20970Sstevel@tonic-gate * Locate the interface ire that is tied to the cache ire 'cire' via 20980Sstevel@tonic-gate * cire->ire_ihandle. 20990Sstevel@tonic-gate * 21000Sstevel@tonic-gate * We are trying to create the cache ire for an offlink destn based 21010Sstevel@tonic-gate * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 21020Sstevel@tonic-gate * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 21030Sstevel@tonic-gate * the IRE_CACHE case. 21040Sstevel@tonic-gate */ 21050Sstevel@tonic-gate ire_t * 21060Sstevel@tonic-gate ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 21070Sstevel@tonic-gate { 21080Sstevel@tonic-gate ire_t *ire; 21090Sstevel@tonic-gate int match_flags; 21100Sstevel@tonic-gate in6_addr_t gw_addr; 21110Sstevel@tonic-gate ipif_t *gw_ipif; 21123448Sdh155122 ip_stack_t *ipst = cire->ire_ipst; 21130Sstevel@tonic-gate 21140Sstevel@tonic-gate ASSERT(cire != NULL && pire != NULL); 21150Sstevel@tonic-gate 21160Sstevel@tonic-gate match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 21170Sstevel@tonic-gate if (pire->ire_ipif != NULL) 2118*8485SPeter.Memishian@Sun.COM match_flags |= MATCH_IRE_ILL; 21190Sstevel@tonic-gate /* 21200Sstevel@tonic-gate * We know that the mask of the interface ire equals cire->ire_cmask. 21210Sstevel@tonic-gate * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 21220Sstevel@tonic-gate * its cmask from the interface ire's mask) 21230Sstevel@tonic-gate */ 21240Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 21250Sstevel@tonic-gate IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 21263448Sdh155122 NULL, match_flags, ipst); 21270Sstevel@tonic-gate if (ire != NULL) 21280Sstevel@tonic-gate return (ire); 21290Sstevel@tonic-gate /* 21300Sstevel@tonic-gate * If we didn't find an interface ire above, we can't declare failure. 21310Sstevel@tonic-gate * For backwards compatibility, we need to support prefix routes 21320Sstevel@tonic-gate * pointing to next hop gateways that are not on-link. 21330Sstevel@tonic-gate * 21340Sstevel@tonic-gate * Assume we are trying to ping some offlink destn, and we have the 21350Sstevel@tonic-gate * routing table below. 21360Sstevel@tonic-gate * 21370Sstevel@tonic-gate * Eg. default - gw1 <--- pire (line 1) 21380Sstevel@tonic-gate * gw1 - gw2 (line 2) 21390Sstevel@tonic-gate * gw2 - hme0 (line 3) 21400Sstevel@tonic-gate * 21410Sstevel@tonic-gate * If we already have a cache ire for gw1 in 'cire', the 21420Sstevel@tonic-gate * ire_ftable_lookup_v6 above would have failed, since there is no 21430Sstevel@tonic-gate * interface ire to reach gw1. We will fallthru below. 21440Sstevel@tonic-gate * 21450Sstevel@tonic-gate * Here we duplicate the steps that ire_ftable_lookup_v6() did in 21460Sstevel@tonic-gate * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 21470Sstevel@tonic-gate * The differences are the following 21480Sstevel@tonic-gate * i. We want the interface ire only, so we call 21490Sstevel@tonic-gate * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 21500Sstevel@tonic-gate * ii. We look for only prefix routes in the 1st call below. 21510Sstevel@tonic-gate * ii. We want to match on the ihandle in the 2nd call below. 21520Sstevel@tonic-gate */ 21530Sstevel@tonic-gate match_flags = MATCH_IRE_TYPE; 21540Sstevel@tonic-gate if (pire->ire_ipif != NULL) 2155*8485SPeter.Memishian@Sun.COM match_flags |= MATCH_IRE_ILL; 21560Sstevel@tonic-gate 21570Sstevel@tonic-gate mutex_enter(&pire->ire_lock); 21580Sstevel@tonic-gate gw_addr = pire->ire_gateway_addr_v6; 21590Sstevel@tonic-gate mutex_exit(&pire->ire_lock); 21600Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 21613448Sdh155122 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 21620Sstevel@tonic-gate if (ire == NULL) 21630Sstevel@tonic-gate return (NULL); 21640Sstevel@tonic-gate /* 21650Sstevel@tonic-gate * At this point 'ire' corresponds to the entry shown in line 2. 21660Sstevel@tonic-gate * gw_addr is 'gw2' in the example above. 21670Sstevel@tonic-gate */ 21680Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 21690Sstevel@tonic-gate gw_addr = ire->ire_gateway_addr_v6; 21700Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 21710Sstevel@tonic-gate gw_ipif = ire->ire_ipif; 21720Sstevel@tonic-gate ire_refrele(ire); 21730Sstevel@tonic-gate 21740Sstevel@tonic-gate match_flags |= MATCH_IRE_IHANDLE; 21750Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 21761676Sjpk gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 21773448Sdh155122 NULL, match_flags, ipst); 21780Sstevel@tonic-gate return (ire); 21790Sstevel@tonic-gate } 21800Sstevel@tonic-gate 21810Sstevel@tonic-gate /* 21820Sstevel@tonic-gate * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 21830Sstevel@tonic-gate * ire associated with the specified ipif. 21840Sstevel@tonic-gate * 21850Sstevel@tonic-gate * This might occasionally be called when IPIF_UP is not set since 21860Sstevel@tonic-gate * the IPV6_MULTICAST_IF as well as creating interface routes 21870Sstevel@tonic-gate * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 21880Sstevel@tonic-gate * 21890Sstevel@tonic-gate * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 21900Sstevel@tonic-gate * the ipif this routine might return NULL. 21910Sstevel@tonic-gate * (Sometimes called as writer though not required by this function.) 21920Sstevel@tonic-gate */ 21930Sstevel@tonic-gate ire_t * 21941676Sjpk ipif_to_ire_v6(const ipif_t *ipif) 21950Sstevel@tonic-gate { 21960Sstevel@tonic-gate ire_t *ire; 2197*8485SPeter.Memishian@Sun.COM ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2198*8485SPeter.Memishian@Sun.COM uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_IPIF; 2199*8485SPeter.Memishian@Sun.COM 2200*8485SPeter.Memishian@Sun.COM /* 2201*8485SPeter.Memishian@Sun.COM * IRE_INTERFACE entries for ills under IPMP are IRE_MARK_TESTHIDDEN 2202*8485SPeter.Memishian@Sun.COM * so that they aren't accidentally returned. However, if the 2203*8485SPeter.Memishian@Sun.COM * caller's ipif is on an ill under IPMP, there's no need to hide 'em. 2204*8485SPeter.Memishian@Sun.COM */ 2205*8485SPeter.Memishian@Sun.COM if (IS_UNDER_IPMP(ipif->ipif_ill)) 2206*8485SPeter.Memishian@Sun.COM match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 22070Sstevel@tonic-gate 22080Sstevel@tonic-gate ASSERT(ipif->ipif_isv6); 22090Sstevel@tonic-gate if (ipif->ipif_ire_type == IRE_LOOPBACK) { 22100Sstevel@tonic-gate ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2211*8485SPeter.Memishian@Sun.COM IRE_LOOPBACK, ipif, ALL_ZONES, NULL, match_flags, ipst); 22120Sstevel@tonic-gate } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 22130Sstevel@tonic-gate /* In this case we need to lookup destination address. */ 22140Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 22150Sstevel@tonic-gate &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2216*8485SPeter.Memishian@Sun.COM 0, NULL, (match_flags | MATCH_IRE_MASK), ipst); 22170Sstevel@tonic-gate } else { 22180Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 22190Sstevel@tonic-gate &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2220*8485SPeter.Memishian@Sun.COM ALL_ZONES, 0, NULL, (match_flags | MATCH_IRE_MASK), ipst); 22210Sstevel@tonic-gate } 22220Sstevel@tonic-gate return (ire); 22230Sstevel@tonic-gate } 22240Sstevel@tonic-gate 22250Sstevel@tonic-gate /* 22260Sstevel@tonic-gate * Return B_TRUE if a multirt route is resolvable 22270Sstevel@tonic-gate * (or if no route is resolved yet), B_FALSE otherwise. 22280Sstevel@tonic-gate * This only works in the global zone. 22290Sstevel@tonic-gate */ 22300Sstevel@tonic-gate boolean_t 22313448Sdh155122 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl, 22323448Sdh155122 ip_stack_t *ipst) 22330Sstevel@tonic-gate { 22340Sstevel@tonic-gate ire_t *first_fire; 22350Sstevel@tonic-gate ire_t *first_cire; 22360Sstevel@tonic-gate ire_t *fire; 22370Sstevel@tonic-gate ire_t *cire; 22380Sstevel@tonic-gate irb_t *firb; 22390Sstevel@tonic-gate irb_t *cirb; 22400Sstevel@tonic-gate int unres_cnt = 0; 22410Sstevel@tonic-gate boolean_t resolvable = B_FALSE; 22420Sstevel@tonic-gate 22430Sstevel@tonic-gate /* Retrieve the first IRE_HOST that matches the destination */ 22440Sstevel@tonic-gate first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 22451676Sjpk NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 22463448Sdh155122 MATCH_IRE_SECATTR, ipst); 22470Sstevel@tonic-gate 22480Sstevel@tonic-gate /* No route at all */ 22490Sstevel@tonic-gate if (first_fire == NULL) { 22500Sstevel@tonic-gate return (B_TRUE); 22510Sstevel@tonic-gate } 22520Sstevel@tonic-gate 22530Sstevel@tonic-gate firb = first_fire->ire_bucket; 22540Sstevel@tonic-gate ASSERT(firb); 22550Sstevel@tonic-gate 22560Sstevel@tonic-gate /* Retrieve the first IRE_CACHE ire for that destination. */ 22573448Sdh155122 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst); 22580Sstevel@tonic-gate 22590Sstevel@tonic-gate /* No resolved route. */ 22600Sstevel@tonic-gate if (first_cire == NULL) { 22610Sstevel@tonic-gate ire_refrele(first_fire); 22620Sstevel@tonic-gate return (B_TRUE); 22630Sstevel@tonic-gate } 22640Sstevel@tonic-gate 22650Sstevel@tonic-gate /* At least one route is resolved. */ 22660Sstevel@tonic-gate 22670Sstevel@tonic-gate cirb = first_cire->ire_bucket; 22680Sstevel@tonic-gate ASSERT(cirb); 22690Sstevel@tonic-gate 22700Sstevel@tonic-gate /* Count the number of routes to that dest that are declared. */ 22710Sstevel@tonic-gate IRB_REFHOLD(firb); 22720Sstevel@tonic-gate for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 22730Sstevel@tonic-gate if (!(fire->ire_flags & RTF_MULTIRT)) 22740Sstevel@tonic-gate continue; 22750Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 22760Sstevel@tonic-gate continue; 22770Sstevel@tonic-gate unres_cnt++; 22780Sstevel@tonic-gate } 22790Sstevel@tonic-gate IRB_REFRELE(firb); 22800Sstevel@tonic-gate 22810Sstevel@tonic-gate 22820Sstevel@tonic-gate /* Then subtract the number of routes to that dst that are resolved */ 22830Sstevel@tonic-gate IRB_REFHOLD(cirb); 22840Sstevel@tonic-gate for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 22854714Ssowmini if (!(cire->ire_flags & RTF_MULTIRT)) 22864714Ssowmini continue; 22874714Ssowmini if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 22884714Ssowmini continue; 2289*8485SPeter.Memishian@Sun.COM if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_TESTHIDDEN)) 22904714Ssowmini continue; 22914714Ssowmini unres_cnt--; 22920Sstevel@tonic-gate } 22930Sstevel@tonic-gate IRB_REFRELE(cirb); 22940Sstevel@tonic-gate 22950Sstevel@tonic-gate /* At least one route is unresolved; search for a resolvable route. */ 22960Sstevel@tonic-gate if (unres_cnt > 0) 22970Sstevel@tonic-gate resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 22983448Sdh155122 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst); 22990Sstevel@tonic-gate 23000Sstevel@tonic-gate if (first_fire) 23010Sstevel@tonic-gate ire_refrele(first_fire); 23020Sstevel@tonic-gate 23030Sstevel@tonic-gate if (first_cire) 23040Sstevel@tonic-gate ire_refrele(first_cire); 23050Sstevel@tonic-gate 23060Sstevel@tonic-gate return (resolvable); 23070Sstevel@tonic-gate } 23080Sstevel@tonic-gate 23090Sstevel@tonic-gate 23100Sstevel@tonic-gate /* 23110Sstevel@tonic-gate * Return B_TRUE and update *ire_arg and *fire_arg 23120Sstevel@tonic-gate * if at least one resolvable route is found. 23130Sstevel@tonic-gate * Return B_FALSE otherwise (all routes are resolved or 23140Sstevel@tonic-gate * the remaining unresolved routes are all unresolvable). 23150Sstevel@tonic-gate * This only works in the global zone. 23160Sstevel@tonic-gate */ 23170Sstevel@tonic-gate boolean_t 23181676Sjpk ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 23193448Sdh155122 const ts_label_t *tsl, ip_stack_t *ipst) 23200Sstevel@tonic-gate { 23210Sstevel@tonic-gate clock_t delta; 23220Sstevel@tonic-gate ire_t *best_fire = NULL; 23230Sstevel@tonic-gate ire_t *best_cire = NULL; 23240Sstevel@tonic-gate ire_t *first_fire; 23250Sstevel@tonic-gate ire_t *first_cire; 23260Sstevel@tonic-gate ire_t *fire; 23270Sstevel@tonic-gate ire_t *cire; 23280Sstevel@tonic-gate irb_t *firb = NULL; 23290Sstevel@tonic-gate irb_t *cirb = NULL; 23300Sstevel@tonic-gate ire_t *gw_ire; 23310Sstevel@tonic-gate boolean_t already_resolved; 23320Sstevel@tonic-gate boolean_t res; 23330Sstevel@tonic-gate in6_addr_t v6dst; 23340Sstevel@tonic-gate in6_addr_t v6gw; 23350Sstevel@tonic-gate 23360Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 23370Sstevel@tonic-gate "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 23380Sstevel@tonic-gate 23390Sstevel@tonic-gate ASSERT(ire_arg); 23400Sstevel@tonic-gate ASSERT(fire_arg); 23410Sstevel@tonic-gate 23420Sstevel@tonic-gate /* Not an IRE_HOST ire; give up. */ 23430Sstevel@tonic-gate if ((*fire_arg == NULL) || 23440Sstevel@tonic-gate ((*fire_arg)->ire_type != IRE_HOST)) { 23450Sstevel@tonic-gate return (B_FALSE); 23460Sstevel@tonic-gate } 23470Sstevel@tonic-gate 23480Sstevel@tonic-gate /* This is the first IRE_HOST ire for that destination. */ 23490Sstevel@tonic-gate first_fire = *fire_arg; 23500Sstevel@tonic-gate firb = first_fire->ire_bucket; 23510Sstevel@tonic-gate ASSERT(firb); 23520Sstevel@tonic-gate 23530Sstevel@tonic-gate mutex_enter(&first_fire->ire_lock); 23540Sstevel@tonic-gate v6dst = first_fire->ire_addr_v6; 23550Sstevel@tonic-gate mutex_exit(&first_fire->ire_lock); 23560Sstevel@tonic-gate 23570Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 23580Sstevel@tonic-gate ntohl(V4_PART_OF_V6(v6dst)))); 23590Sstevel@tonic-gate 23600Sstevel@tonic-gate /* 23610Sstevel@tonic-gate * Retrieve the first IRE_CACHE ire for that destination; 23620Sstevel@tonic-gate * if we don't find one, no route for that dest is 23630Sstevel@tonic-gate * resolved yet. 23640Sstevel@tonic-gate */ 23653448Sdh155122 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst); 23660Sstevel@tonic-gate if (first_cire) { 23670Sstevel@tonic-gate cirb = first_cire->ire_bucket; 23680Sstevel@tonic-gate } 23690Sstevel@tonic-gate 23700Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 23710Sstevel@tonic-gate 23720Sstevel@tonic-gate /* 23730Sstevel@tonic-gate * Search for a resolvable route, giving the top priority 23740Sstevel@tonic-gate * to routes that can be resolved without any call to the resolver. 23750Sstevel@tonic-gate */ 23760Sstevel@tonic-gate IRB_REFHOLD(firb); 23770Sstevel@tonic-gate 23780Sstevel@tonic-gate if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 23790Sstevel@tonic-gate /* 23800Sstevel@tonic-gate * For all multiroute IRE_HOST ires for that destination, 23810Sstevel@tonic-gate * check if the route via the IRE_HOST's gateway is 23820Sstevel@tonic-gate * resolved yet. 23830Sstevel@tonic-gate */ 23840Sstevel@tonic-gate for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 23850Sstevel@tonic-gate 23860Sstevel@tonic-gate if (!(fire->ire_flags & RTF_MULTIRT)) 23870Sstevel@tonic-gate continue; 23880Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 23890Sstevel@tonic-gate continue; 23900Sstevel@tonic-gate 23911676Sjpk if (fire->ire_gw_secattr != NULL && 23921676Sjpk tsol_ire_match_gwattr(fire, tsl) != 0) { 23931676Sjpk continue; 23941676Sjpk } 23951676Sjpk 23960Sstevel@tonic-gate mutex_enter(&fire->ire_lock); 23970Sstevel@tonic-gate v6gw = fire->ire_gateway_addr_v6; 23980Sstevel@tonic-gate mutex_exit(&fire->ire_lock); 23990Sstevel@tonic-gate 24000Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: fire %p, " 24010Sstevel@tonic-gate "ire_addr %08x, ire_gateway_addr %08x\n", 24020Sstevel@tonic-gate (void *)fire, 24030Sstevel@tonic-gate ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 24040Sstevel@tonic-gate ntohl(V4_PART_OF_V6(v6gw)))); 24050Sstevel@tonic-gate 24060Sstevel@tonic-gate already_resolved = B_FALSE; 24070Sstevel@tonic-gate 24080Sstevel@tonic-gate if (first_cire) { 24090Sstevel@tonic-gate ASSERT(cirb); 24100Sstevel@tonic-gate 24110Sstevel@tonic-gate IRB_REFHOLD(cirb); 24120Sstevel@tonic-gate /* 24130Sstevel@tonic-gate * For all IRE_CACHE ires for that 24140Sstevel@tonic-gate * destination. 24150Sstevel@tonic-gate */ 24160Sstevel@tonic-gate for (cire = first_cire; 24170Sstevel@tonic-gate cire != NULL; 24180Sstevel@tonic-gate cire = cire->ire_next) { 24190Sstevel@tonic-gate 24200Sstevel@tonic-gate if (!(cire->ire_flags & RTF_MULTIRT)) 24210Sstevel@tonic-gate continue; 24220Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL( 24230Sstevel@tonic-gate &cire->ire_addr_v6, &v6dst)) 24240Sstevel@tonic-gate continue; 24250Sstevel@tonic-gate if (cire->ire_marks & 24260Sstevel@tonic-gate (IRE_MARK_CONDEMNED| 2427*8485SPeter.Memishian@Sun.COM IRE_MARK_TESTHIDDEN)) 24280Sstevel@tonic-gate continue; 24291676Sjpk 24301676Sjpk if (cire->ire_gw_secattr != NULL && 24311676Sjpk tsol_ire_match_gwattr(cire, 24321676Sjpk tsl) != 0) { 24331676Sjpk continue; 24341676Sjpk } 24351676Sjpk 24360Sstevel@tonic-gate /* 24370Sstevel@tonic-gate * Check if the IRE_CACHE's gateway 24380Sstevel@tonic-gate * matches the IRE_HOST's gateway. 24390Sstevel@tonic-gate */ 24400Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL( 24410Sstevel@tonic-gate &cire->ire_gateway_addr_v6, 24420Sstevel@tonic-gate &v6gw)) { 24430Sstevel@tonic-gate already_resolved = B_TRUE; 24440Sstevel@tonic-gate break; 24450Sstevel@tonic-gate } 24460Sstevel@tonic-gate } 24470Sstevel@tonic-gate IRB_REFRELE(cirb); 24480Sstevel@tonic-gate } 24490Sstevel@tonic-gate 24500Sstevel@tonic-gate /* 24510Sstevel@tonic-gate * This route is already resolved; 24520Sstevel@tonic-gate * proceed with next one. 24530Sstevel@tonic-gate */ 24540Sstevel@tonic-gate if (already_resolved) { 24550Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 24560Sstevel@tonic-gate "already resolved\n", (void *)cire)); 24570Sstevel@tonic-gate continue; 24580Sstevel@tonic-gate } 24590Sstevel@tonic-gate 24600Sstevel@tonic-gate /* 24610Sstevel@tonic-gate * The route is unresolved; is it actually 24620Sstevel@tonic-gate * resolvable, i.e. is there a cache or a resolver 24630Sstevel@tonic-gate * for the gateway? 24640Sstevel@tonic-gate */ 24650Sstevel@tonic-gate gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 24661676Sjpk ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 24673448Sdh155122 MATCH_IRE_SECATTR, ipst); 24680Sstevel@tonic-gate 24690Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 24700Sstevel@tonic-gate (void *)gw_ire)); 24710Sstevel@tonic-gate 24720Sstevel@tonic-gate /* 24730Sstevel@tonic-gate * This route can be resolved without any call to the 24740Sstevel@tonic-gate * resolver; if the MULTIRT_CACHEGW flag is set, 24750Sstevel@tonic-gate * give the top priority to this ire and exit the 24760Sstevel@tonic-gate * loop. 24770Sstevel@tonic-gate * This occurs when an resolver reply is processed 24780Sstevel@tonic-gate * through ip_wput_nondata() 24790Sstevel@tonic-gate */ 24800Sstevel@tonic-gate if ((flags & MULTIRT_CACHEGW) && 24810Sstevel@tonic-gate (gw_ire != NULL) && 24820Sstevel@tonic-gate (gw_ire->ire_type & IRE_CACHETABLE)) { 24830Sstevel@tonic-gate /* 24840Sstevel@tonic-gate * Release the resolver associated to the 24850Sstevel@tonic-gate * previous candidate best ire, if any. 24860Sstevel@tonic-gate */ 24870Sstevel@tonic-gate if (best_cire) { 24880Sstevel@tonic-gate ire_refrele(best_cire); 24890Sstevel@tonic-gate ASSERT(best_fire); 24900Sstevel@tonic-gate } 24910Sstevel@tonic-gate 24920Sstevel@tonic-gate best_fire = fire; 24930Sstevel@tonic-gate best_cire = gw_ire; 24940Sstevel@tonic-gate 24950Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: found top prio " 24960Sstevel@tonic-gate "best_fire %p, best_cire %p\n", 24970Sstevel@tonic-gate (void *)best_fire, (void *)best_cire)); 24980Sstevel@tonic-gate break; 24990Sstevel@tonic-gate } 25000Sstevel@tonic-gate 25010Sstevel@tonic-gate /* 25020Sstevel@tonic-gate * Compute the time elapsed since our preceding 25030Sstevel@tonic-gate * attempt to resolve that route. 25040Sstevel@tonic-gate * If the MULTIRT_USESTAMP flag is set, we take that 25050Sstevel@tonic-gate * route into account only if this time interval 25060Sstevel@tonic-gate * exceeds ip_multirt_resolution_interval; 25070Sstevel@tonic-gate * this prevents us from attempting to resolve a 25080Sstevel@tonic-gate * broken route upon each sending of a packet. 25090Sstevel@tonic-gate */ 25100Sstevel@tonic-gate delta = lbolt - fire->ire_last_used_time; 25110Sstevel@tonic-gate delta = TICK_TO_MSEC(delta); 25120Sstevel@tonic-gate 25130Sstevel@tonic-gate res = (boolean_t) 25143448Sdh155122 ((delta > ipst-> 25154714Ssowmini ips_ip_multirt_resolution_interval) || 25163448Sdh155122 (!(flags & MULTIRT_USESTAMP))); 25170Sstevel@tonic-gate 25180Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 25190Sstevel@tonic-gate "res %d\n", 25200Sstevel@tonic-gate (void *)fire, delta, res)); 25210Sstevel@tonic-gate 25220Sstevel@tonic-gate if (res) { 25230Sstevel@tonic-gate /* 25240Sstevel@tonic-gate * A resolver exists for the gateway: save 25250Sstevel@tonic-gate * the current IRE_HOST ire as a candidate 25260Sstevel@tonic-gate * best ire. If we later discover that a 25270Sstevel@tonic-gate * top priority ire exists (i.e. no need to 25280Sstevel@tonic-gate * call the resolver), then this new ire 25290Sstevel@tonic-gate * will be preferred to the current one. 25300Sstevel@tonic-gate */ 25310Sstevel@tonic-gate if (gw_ire != NULL) { 25320Sstevel@tonic-gate if (best_fire == NULL) { 25330Sstevel@tonic-gate ASSERT(best_cire == NULL); 25340Sstevel@tonic-gate 25350Sstevel@tonic-gate best_fire = fire; 25360Sstevel@tonic-gate best_cire = gw_ire; 25370Sstevel@tonic-gate 25380Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6:" 25390Sstevel@tonic-gate "found candidate " 25400Sstevel@tonic-gate "best_fire %p, " 25410Sstevel@tonic-gate "best_cire %p\n", 25420Sstevel@tonic-gate (void *)best_fire, 25430Sstevel@tonic-gate (void *)best_cire)); 25440Sstevel@tonic-gate 25450Sstevel@tonic-gate /* 25460Sstevel@tonic-gate * If MULTIRT_CACHEGW is not 25470Sstevel@tonic-gate * set, we ignore the top 25480Sstevel@tonic-gate * priority ires that can 25490Sstevel@tonic-gate * be resolved without any 25500Sstevel@tonic-gate * call to the resolver; 25510Sstevel@tonic-gate * In that case, there is 25520Sstevel@tonic-gate * actually no need 25530Sstevel@tonic-gate * to continue the loop. 25540Sstevel@tonic-gate */ 25550Sstevel@tonic-gate if (!(flags & 25560Sstevel@tonic-gate MULTIRT_CACHEGW)) { 25570Sstevel@tonic-gate break; 25580Sstevel@tonic-gate } 25590Sstevel@tonic-gate continue; 25600Sstevel@tonic-gate } 25610Sstevel@tonic-gate } else { 25620Sstevel@tonic-gate /* 25630Sstevel@tonic-gate * No resolver for the gateway: the 25640Sstevel@tonic-gate * route is not resolvable. 25650Sstevel@tonic-gate * If the MULTIRT_SETSTAMP flag is 25660Sstevel@tonic-gate * set, we stamp the IRE_HOST ire, 25670Sstevel@tonic-gate * so we will not select it again 25680Sstevel@tonic-gate * during this resolution interval. 25690Sstevel@tonic-gate */ 25700Sstevel@tonic-gate if (flags & MULTIRT_SETSTAMP) 25710Sstevel@tonic-gate fire->ire_last_used_time = 25720Sstevel@tonic-gate lbolt; 25730Sstevel@tonic-gate } 25740Sstevel@tonic-gate } 25750Sstevel@tonic-gate 25760Sstevel@tonic-gate if (gw_ire != NULL) 25770Sstevel@tonic-gate ire_refrele(gw_ire); 25780Sstevel@tonic-gate } 25790Sstevel@tonic-gate } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 25800Sstevel@tonic-gate 25810Sstevel@tonic-gate for (fire = first_fire; 25820Sstevel@tonic-gate fire != NULL; 25830Sstevel@tonic-gate fire = fire->ire_next) { 25840Sstevel@tonic-gate 25850Sstevel@tonic-gate if (!(fire->ire_flags & RTF_MULTIRT)) 25860Sstevel@tonic-gate continue; 25870Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 25880Sstevel@tonic-gate continue; 25890Sstevel@tonic-gate 25901676Sjpk if (fire->ire_gw_secattr != NULL && 25911676Sjpk tsol_ire_match_gwattr(fire, tsl) != 0) { 25921676Sjpk continue; 25931676Sjpk } 25941676Sjpk 25950Sstevel@tonic-gate already_resolved = B_FALSE; 25960Sstevel@tonic-gate 25970Sstevel@tonic-gate mutex_enter(&fire->ire_lock); 25980Sstevel@tonic-gate v6gw = fire->ire_gateway_addr_v6; 25990Sstevel@tonic-gate mutex_exit(&fire->ire_lock); 26000Sstevel@tonic-gate 26010Sstevel@tonic-gate gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 26021676Sjpk IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 26031676Sjpk MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 26043448Sdh155122 MATCH_IRE_SECATTR, ipst); 26050Sstevel@tonic-gate 26060Sstevel@tonic-gate /* No resolver for the gateway; we skip this ire. */ 26070Sstevel@tonic-gate if (gw_ire == NULL) { 26080Sstevel@tonic-gate continue; 26090Sstevel@tonic-gate } 26100Sstevel@tonic-gate 26110Sstevel@tonic-gate if (first_cire) { 26120Sstevel@tonic-gate 26130Sstevel@tonic-gate IRB_REFHOLD(cirb); 26140Sstevel@tonic-gate /* 26150Sstevel@tonic-gate * For all IRE_CACHE ires for that 26160Sstevel@tonic-gate * destination. 26170Sstevel@tonic-gate */ 26180Sstevel@tonic-gate for (cire = first_cire; 26190Sstevel@tonic-gate cire != NULL; 26200Sstevel@tonic-gate cire = cire->ire_next) { 26210Sstevel@tonic-gate 26220Sstevel@tonic-gate if (!(cire->ire_flags & RTF_MULTIRT)) 26230Sstevel@tonic-gate continue; 26240Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL( 26250Sstevel@tonic-gate &cire->ire_addr_v6, &v6dst)) 26260Sstevel@tonic-gate continue; 26270Sstevel@tonic-gate if (cire->ire_marks & 2628*8485SPeter.Memishian@Sun.COM IRE_MARK_CONDEMNED) 26290Sstevel@tonic-gate continue; 26301676Sjpk 26311676Sjpk if (cire->ire_gw_secattr != NULL && 26321676Sjpk tsol_ire_match_gwattr(cire, 26331676Sjpk tsl) != 0) { 26341676Sjpk continue; 26351676Sjpk } 26361676Sjpk 26370Sstevel@tonic-gate /* 26380Sstevel@tonic-gate * Cache entries are linked to the 26390Sstevel@tonic-gate * parent routes using the parent handle 26400Sstevel@tonic-gate * (ire_phandle). If no cache entry has 26410Sstevel@tonic-gate * the same handle as fire, fire is 26420Sstevel@tonic-gate * still unresolved. 26430Sstevel@tonic-gate */ 26440Sstevel@tonic-gate ASSERT(cire->ire_phandle != 0); 26450Sstevel@tonic-gate if (cire->ire_phandle == 26460Sstevel@tonic-gate fire->ire_phandle) { 26470Sstevel@tonic-gate already_resolved = B_TRUE; 26480Sstevel@tonic-gate break; 26490Sstevel@tonic-gate } 26500Sstevel@tonic-gate } 26510Sstevel@tonic-gate IRB_REFRELE(cirb); 26520Sstevel@tonic-gate } 26530Sstevel@tonic-gate 26540Sstevel@tonic-gate /* 26550Sstevel@tonic-gate * This route is already resolved; proceed with 26560Sstevel@tonic-gate * next one. 26570Sstevel@tonic-gate */ 26580Sstevel@tonic-gate if (already_resolved) { 26590Sstevel@tonic-gate ire_refrele(gw_ire); 26600Sstevel@tonic-gate continue; 26610Sstevel@tonic-gate } 26620Sstevel@tonic-gate 26630Sstevel@tonic-gate /* 26640Sstevel@tonic-gate * Compute the time elapsed since our preceding 26650Sstevel@tonic-gate * attempt to resolve that route. 26660Sstevel@tonic-gate * If the MULTIRT_USESTAMP flag is set, we take 26670Sstevel@tonic-gate * that route into account only if this time 26680Sstevel@tonic-gate * interval exceeds ip_multirt_resolution_interval; 26690Sstevel@tonic-gate * this prevents us from attempting to resolve a 26700Sstevel@tonic-gate * broken route upon each sending of a packet. 26710Sstevel@tonic-gate */ 26720Sstevel@tonic-gate delta = lbolt - fire->ire_last_used_time; 26730Sstevel@tonic-gate delta = TICK_TO_MSEC(delta); 26740Sstevel@tonic-gate 26750Sstevel@tonic-gate res = (boolean_t) 26763448Sdh155122 ((delta > ipst-> 26774714Ssowmini ips_ip_multirt_resolution_interval) || 26780Sstevel@tonic-gate (!(flags & MULTIRT_USESTAMP))); 26790Sstevel@tonic-gate 26800Sstevel@tonic-gate ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 26810Sstevel@tonic-gate "flags %04x, res %d\n", 26820Sstevel@tonic-gate (void *)fire, delta, flags, res)); 26830Sstevel@tonic-gate 26840Sstevel@tonic-gate if (res) { 26850Sstevel@tonic-gate if (best_cire) { 26860Sstevel@tonic-gate /* 26870Sstevel@tonic-gate * Release the resolver associated 26880Sstevel@tonic-gate * to the preceding candidate best 26890Sstevel@tonic-gate * ire, if any. 26900Sstevel@tonic-gate */ 26910Sstevel@tonic-gate ire_refrele(best_cire); 26920Sstevel@tonic-gate ASSERT(best_fire); 26930Sstevel@tonic-gate } 26940Sstevel@tonic-gate best_fire = fire; 26950Sstevel@tonic-gate best_cire = gw_ire; 26960Sstevel@tonic-gate continue; 26970Sstevel@tonic-gate } 26980Sstevel@tonic-gate 26990Sstevel@tonic-gate ire_refrele(gw_ire); 27000Sstevel@tonic-gate } 27010Sstevel@tonic-gate } 27020Sstevel@tonic-gate 27030Sstevel@tonic-gate if (best_fire) { 27040Sstevel@tonic-gate IRE_REFHOLD(best_fire); 27050Sstevel@tonic-gate } 27060Sstevel@tonic-gate IRB_REFRELE(firb); 27070Sstevel@tonic-gate 27080Sstevel@tonic-gate /* Release the first IRE_CACHE we initially looked up, if any. */ 27090Sstevel@tonic-gate if (first_cire) 27100Sstevel@tonic-gate ire_refrele(first_cire); 27110Sstevel@tonic-gate 27120Sstevel@tonic-gate /* Found a resolvable route. */ 27130Sstevel@tonic-gate if (best_fire) { 27140Sstevel@tonic-gate ASSERT(best_cire); 27150Sstevel@tonic-gate 27160Sstevel@tonic-gate if (*fire_arg) 27170Sstevel@tonic-gate ire_refrele(*fire_arg); 27180Sstevel@tonic-gate if (*ire_arg) 27190Sstevel@tonic-gate ire_refrele(*ire_arg); 27200Sstevel@tonic-gate 27210Sstevel@tonic-gate /* 27220Sstevel@tonic-gate * Update the passed arguments with the 27230Sstevel@tonic-gate * resolvable multirt route we found 27240Sstevel@tonic-gate */ 27250Sstevel@tonic-gate *fire_arg = best_fire; 27260Sstevel@tonic-gate *ire_arg = best_cire; 27270Sstevel@tonic-gate 27280Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 27290Sstevel@tonic-gate "*fire_arg %p, *ire_arg %p\n", 27300Sstevel@tonic-gate (void *)best_fire, (void *)best_cire)); 27310Sstevel@tonic-gate 27320Sstevel@tonic-gate return (B_TRUE); 27330Sstevel@tonic-gate } 27340Sstevel@tonic-gate 27350Sstevel@tonic-gate ASSERT(best_cire == NULL); 27360Sstevel@tonic-gate 27370Sstevel@tonic-gate ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 27380Sstevel@tonic-gate "*ire_arg %p\n", 27390Sstevel@tonic-gate (void *)*fire_arg, (void *)*ire_arg)); 27400Sstevel@tonic-gate 27410Sstevel@tonic-gate /* No resolvable route. */ 27420Sstevel@tonic-gate return (B_FALSE); 27430Sstevel@tonic-gate } 27440Sstevel@tonic-gate 27450Sstevel@tonic-gate 27460Sstevel@tonic-gate /* 27470Sstevel@tonic-gate * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 27480Sstevel@tonic-gate * that goes through 'ipif'. As a fallback, a route that goes through 27490Sstevel@tonic-gate * ipif->ipif_ill can be returned. 27500Sstevel@tonic-gate */ 27510Sstevel@tonic-gate ire_t * 27520Sstevel@tonic-gate ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 27530Sstevel@tonic-gate { 27540Sstevel@tonic-gate ire_t *ire; 27550Sstevel@tonic-gate ire_t *save_ire = NULL; 27560Sstevel@tonic-gate ire_t *gw_ire; 27570Sstevel@tonic-gate irb_t *irb; 27580Sstevel@tonic-gate in6_addr_t v6gw; 27590Sstevel@tonic-gate int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 27603448Sdh155122 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 27610Sstevel@tonic-gate 27620Sstevel@tonic-gate ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 27633448Sdh155122 NULL, MATCH_IRE_DEFAULT, ipst); 27640Sstevel@tonic-gate 27650Sstevel@tonic-gate if (ire == NULL) 27660Sstevel@tonic-gate return (NULL); 27670Sstevel@tonic-gate 27680Sstevel@tonic-gate irb = ire->ire_bucket; 27690Sstevel@tonic-gate ASSERT(irb); 27700Sstevel@tonic-gate 27710Sstevel@tonic-gate IRB_REFHOLD(irb); 27720Sstevel@tonic-gate ire_refrele(ire); 27730Sstevel@tonic-gate for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 27740Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 27751676Sjpk (ipif->ipif_zoneid != ire->ire_zoneid && 27761676Sjpk ire->ire_zoneid != ALL_ZONES)) { 27770Sstevel@tonic-gate continue; 27780Sstevel@tonic-gate } 27790Sstevel@tonic-gate 27800Sstevel@tonic-gate switch (ire->ire_type) { 27810Sstevel@tonic-gate case IRE_DEFAULT: 27820Sstevel@tonic-gate case IRE_PREFIX: 27830Sstevel@tonic-gate case IRE_HOST: 27840Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 27850Sstevel@tonic-gate v6gw = ire->ire_gateway_addr_v6; 27860Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 27870Sstevel@tonic-gate gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 27880Sstevel@tonic-gate IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 27893448Sdh155122 NULL, match_flags, ipst); 27900Sstevel@tonic-gate 27910Sstevel@tonic-gate if (gw_ire != NULL) { 27920Sstevel@tonic-gate if (save_ire != NULL) { 27930Sstevel@tonic-gate ire_refrele(save_ire); 27940Sstevel@tonic-gate } 27950Sstevel@tonic-gate IRE_REFHOLD(ire); 27960Sstevel@tonic-gate if (gw_ire->ire_ipif == ipif) { 27970Sstevel@tonic-gate ire_refrele(gw_ire); 27980Sstevel@tonic-gate 27990Sstevel@tonic-gate IRB_REFRELE(irb); 28000Sstevel@tonic-gate return (ire); 28010Sstevel@tonic-gate } 28020Sstevel@tonic-gate ire_refrele(gw_ire); 28030Sstevel@tonic-gate save_ire = ire; 28040Sstevel@tonic-gate } 28050Sstevel@tonic-gate break; 28060Sstevel@tonic-gate case IRE_IF_NORESOLVER: 28070Sstevel@tonic-gate case IRE_IF_RESOLVER: 28080Sstevel@tonic-gate if (ire->ire_ipif == ipif) { 28090Sstevel@tonic-gate if (save_ire != NULL) { 28100Sstevel@tonic-gate ire_refrele(save_ire); 28110Sstevel@tonic-gate } 28120Sstevel@tonic-gate IRE_REFHOLD(ire); 28130Sstevel@tonic-gate 28140Sstevel@tonic-gate IRB_REFRELE(irb); 28150Sstevel@tonic-gate return (ire); 28160Sstevel@tonic-gate } 28170Sstevel@tonic-gate break; 28180Sstevel@tonic-gate } 28190Sstevel@tonic-gate } 28200Sstevel@tonic-gate IRB_REFRELE(irb); 28210Sstevel@tonic-gate 28220Sstevel@tonic-gate return (save_ire); 28230Sstevel@tonic-gate } 28247880SJonathan.Anderson@Sun.COM 28257880SJonathan.Anderson@Sun.COM /* 28267880SJonathan.Anderson@Sun.COM * This is the implementation of the IPv6 IRE cache lookup procedure. 28277880SJonathan.Anderson@Sun.COM * Separating the interface from the implementation allows additional 28287880SJonathan.Anderson@Sun.COM * flexibility when specifying search criteria. 28297880SJonathan.Anderson@Sun.COM */ 28307880SJonathan.Anderson@Sun.COM static ire_t * 28317880SJonathan.Anderson@Sun.COM ip6_ctable_lookup_impl(ire_ctable_args_t *margs) 28327880SJonathan.Anderson@Sun.COM { 28337880SJonathan.Anderson@Sun.COM irb_t *irb_ptr; 28347880SJonathan.Anderson@Sun.COM ire_t *ire; 28357880SJonathan.Anderson@Sun.COM ip_stack_t *ipst = margs->ict_ipst; 28367880SJonathan.Anderson@Sun.COM 2837*8485SPeter.Memishian@Sun.COM if ((margs->ict_flags & (MATCH_IRE_SRC | MATCH_IRE_ILL)) && 28387880SJonathan.Anderson@Sun.COM (margs->ict_ipif == NULL)) { 28397880SJonathan.Anderson@Sun.COM return (NULL); 28407880SJonathan.Anderson@Sun.COM } 28417880SJonathan.Anderson@Sun.COM 28427880SJonathan.Anderson@Sun.COM irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 28437880SJonathan.Anderson@Sun.COM *((in6_addr_t *)(margs->ict_addr)), 28447880SJonathan.Anderson@Sun.COM ipst->ips_ip6_cache_table_size)]; 28457880SJonathan.Anderson@Sun.COM rw_enter(&irb_ptr->irb_lock, RW_READER); 28467880SJonathan.Anderson@Sun.COM for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { 28477880SJonathan.Anderson@Sun.COM if (ire->ire_marks & IRE_MARK_CONDEMNED) 28487880SJonathan.Anderson@Sun.COM continue; 28497880SJonathan.Anderson@Sun.COM ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 28507880SJonathan.Anderson@Sun.COM if (ire_match_args_v6(ire, (in6_addr_t *)margs->ict_addr, 28517880SJonathan.Anderson@Sun.COM &ire->ire_mask_v6, (in6_addr_t *)margs->ict_gateway, 28527880SJonathan.Anderson@Sun.COM margs->ict_type, margs->ict_ipif, margs->ict_zoneid, 0, 28537880SJonathan.Anderson@Sun.COM margs->ict_tsl, margs->ict_flags)) { 28547880SJonathan.Anderson@Sun.COM IRE_REFHOLD(ire); 28557880SJonathan.Anderson@Sun.COM rw_exit(&irb_ptr->irb_lock); 28567880SJonathan.Anderson@Sun.COM return (ire); 28577880SJonathan.Anderson@Sun.COM } 28587880SJonathan.Anderson@Sun.COM } 28597880SJonathan.Anderson@Sun.COM 28607880SJonathan.Anderson@Sun.COM rw_exit(&irb_ptr->irb_lock); 28617880SJonathan.Anderson@Sun.COM return (NULL); 28627880SJonathan.Anderson@Sun.COM } 2863