xref: /onnv-gate/usr/src/uts/common/inet/ip/ip6_ire.c (revision 4333:8f4984b4f93e)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51676Sjpk  * Common Development and Distribution License (the "License").
61676Sjpk  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
223448Sdh155122  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate /*
260Sstevel@tonic-gate  * Copyright (c) 1990 Mentat Inc.
270Sstevel@tonic-gate  */
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
300Sstevel@tonic-gate 
310Sstevel@tonic-gate /*
320Sstevel@tonic-gate  * This file contains routines that manipulate Internet Routing Entries (IREs).
330Sstevel@tonic-gate  */
340Sstevel@tonic-gate #include <sys/types.h>
350Sstevel@tonic-gate #include <sys/stream.h>
360Sstevel@tonic-gate #include <sys/stropts.h>
370Sstevel@tonic-gate #include <sys/ddi.h>
380Sstevel@tonic-gate #include <sys/cmn_err.h>
390Sstevel@tonic-gate 
400Sstevel@tonic-gate #include <sys/systm.h>
410Sstevel@tonic-gate #include <sys/param.h>
420Sstevel@tonic-gate #include <sys/socket.h>
430Sstevel@tonic-gate #include <net/if.h>
440Sstevel@tonic-gate #include <net/route.h>
450Sstevel@tonic-gate #include <netinet/in.h>
460Sstevel@tonic-gate #include <net/if_dl.h>
470Sstevel@tonic-gate #include <netinet/ip6.h>
480Sstevel@tonic-gate #include <netinet/icmp6.h>
490Sstevel@tonic-gate 
500Sstevel@tonic-gate #include <inet/common.h>
510Sstevel@tonic-gate #include <inet/mi.h>
520Sstevel@tonic-gate #include <inet/ip.h>
530Sstevel@tonic-gate #include <inet/ip6.h>
540Sstevel@tonic-gate #include <inet/ip_ndp.h>
550Sstevel@tonic-gate #include <inet/ip_if.h>
560Sstevel@tonic-gate #include <inet/ip_ire.h>
570Sstevel@tonic-gate #include <inet/ipclassifier.h>
580Sstevel@tonic-gate #include <inet/nd.h>
590Sstevel@tonic-gate #include <sys/kmem.h>
600Sstevel@tonic-gate #include <sys/zone.h>
610Sstevel@tonic-gate 
621676Sjpk #include <sys/tsol/label.h>
631676Sjpk #include <sys/tsol/tnet.h>
641676Sjpk 
650Sstevel@tonic-gate static	ire_t	ire_null;
660Sstevel@tonic-gate 
670Sstevel@tonic-gate static ire_t	*ire_ihandle_lookup_onlink_v6(ire_t *cire);
680Sstevel@tonic-gate static	void	ire_report_ftable_v6(ire_t *ire, char *mp);
690Sstevel@tonic-gate static	void	ire_report_ctable_v6(ire_t *ire, char *mp);
700Sstevel@tonic-gate static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr,
711676Sjpk     const in6_addr_t *mask, const in6_addr_t *gateway, int type,
721676Sjpk     const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle,
731676Sjpk     const ts_label_t *tsl, int match_flags);
740Sstevel@tonic-gate 
750Sstevel@tonic-gate /*
760Sstevel@tonic-gate  * Named Dispatch routine to produce a formatted report on all IREs.
770Sstevel@tonic-gate  * This report is accessed by using the ndd utility to "get" ND variable
780Sstevel@tonic-gate  * "ip_ire_status_v6".
790Sstevel@tonic-gate  */
800Sstevel@tonic-gate /* ARGSUSED */
810Sstevel@tonic-gate int
820Sstevel@tonic-gate ip_ire_report_v6(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr)
830Sstevel@tonic-gate {
840Sstevel@tonic-gate 	zoneid_t zoneid;
853448Sdh155122 	ip_stack_t *ipst;
860Sstevel@tonic-gate 
870Sstevel@tonic-gate 	(void) mi_mpprintf(mp,
880Sstevel@tonic-gate 	    "IRE      " MI_COL_HDRPAD_STR
890Sstevel@tonic-gate 	    "rfq      " MI_COL_HDRPAD_STR
900Sstevel@tonic-gate 	    "stq      " MI_COL_HDRPAD_STR
910Sstevel@tonic-gate 	    " zone mxfrg rtt   rtt_sd ssthresh ref "
920Sstevel@tonic-gate 	    "rtomax tstamp_ok wscale_ok ecn_ok pmtud_ok sack sendpipe recvpipe "
930Sstevel@tonic-gate 	    "in/out/forward type    addr         mask         "
940Sstevel@tonic-gate 	    "src             gateway");
950Sstevel@tonic-gate 	/*
960Sstevel@tonic-gate 	 *   01234567 01234567 01234567 12345 12345 12345 12345  12345678 123
970Sstevel@tonic-gate 	 *   123456 123456789 123456789 123456 12345678 1234 12345678 12345678
980Sstevel@tonic-gate 	 *   in/out/forward xxxxxxxxxx
990Sstevel@tonic-gate 	 *   xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
1000Sstevel@tonic-gate 	 *   xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
1010Sstevel@tonic-gate 	 *   xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
1020Sstevel@tonic-gate 	 *   xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
1030Sstevel@tonic-gate 	 */
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate 	/*
1060Sstevel@tonic-gate 	 * Because of the ndd constraint, at most we can have 64K buffer
1070Sstevel@tonic-gate 	 * to put in all IRE info.  So to be more efficient, just
1080Sstevel@tonic-gate 	 * allocate a 64K buffer here, assuming we need that large buffer.
1090Sstevel@tonic-gate 	 * This should be OK as only root can do ndd /dev/ip.
1100Sstevel@tonic-gate 	 */
1110Sstevel@tonic-gate 	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
1120Sstevel@tonic-gate 		/* The following may work even if we cannot get a large buf. */
1130Sstevel@tonic-gate 		(void) mi_mpprintf(mp, "<< Out of buffer >>\n");
1140Sstevel@tonic-gate 		return (0);
1150Sstevel@tonic-gate 	}
1160Sstevel@tonic-gate 	zoneid = Q_TO_CONN(q)->conn_zoneid;
1170Sstevel@tonic-gate 	if (zoneid == GLOBAL_ZONEID)
1180Sstevel@tonic-gate 		zoneid = ALL_ZONES;
1193448Sdh155122 	ipst = CONNQ_TO_IPST(q);
1200Sstevel@tonic-gate 
1213448Sdh155122 	ire_walk_v6(ire_report_ftable_v6, (char *)mp->b_cont, zoneid, ipst);
1223448Sdh155122 	ire_walk_v6(ire_report_ctable_v6, (char *)mp->b_cont, zoneid, ipst);
1230Sstevel@tonic-gate 	return (0);
1240Sstevel@tonic-gate }
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate /*
1270Sstevel@tonic-gate  * ire_walk routine invoked for ip_ire_report_v6 for each IRE.
1280Sstevel@tonic-gate  */
1290Sstevel@tonic-gate static void
1300Sstevel@tonic-gate ire_report_ftable_v6(ire_t *ire, char *mp)
1310Sstevel@tonic-gate {
1320Sstevel@tonic-gate 	char	buf1[INET6_ADDRSTRLEN];
1330Sstevel@tonic-gate 	char	buf2[INET6_ADDRSTRLEN];
1340Sstevel@tonic-gate 	char	buf3[INET6_ADDRSTRLEN];
1350Sstevel@tonic-gate 	char	buf4[INET6_ADDRSTRLEN];
1360Sstevel@tonic-gate 	uint_t	fo_pkt_count;
1370Sstevel@tonic-gate 	uint_t	ib_pkt_count;
1380Sstevel@tonic-gate 	int	ref;
1390Sstevel@tonic-gate 	in6_addr_t gw_addr_v6;
1400Sstevel@tonic-gate 	uint_t	print_len, buf_len;
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
1430Sstevel@tonic-gate 	if (ire->ire_type & IRE_CACHETABLE)
1440Sstevel@tonic-gate 	    return;
1450Sstevel@tonic-gate 	buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr;
1460Sstevel@tonic-gate 	if (buf_len <= 0)
1470Sstevel@tonic-gate 		return;
1480Sstevel@tonic-gate 
1490Sstevel@tonic-gate 	/* Number of active references of this ire */
1500Sstevel@tonic-gate 	ref = ire->ire_refcnt;
1510Sstevel@tonic-gate 	/* "inbound" to a non local address is a forward */
1520Sstevel@tonic-gate 	ib_pkt_count = ire->ire_ib_pkt_count;
1530Sstevel@tonic-gate 	fo_pkt_count = 0;
1540Sstevel@tonic-gate 	ASSERT(!(ire->ire_type & IRE_BROADCAST));
1550Sstevel@tonic-gate 	if (!(ire->ire_type & (IRE_LOCAL|IRE_BROADCAST))) {
1560Sstevel@tonic-gate 		fo_pkt_count = ib_pkt_count;
1570Sstevel@tonic-gate 		ib_pkt_count = 0;
1580Sstevel@tonic-gate 	}
1590Sstevel@tonic-gate 
1600Sstevel@tonic-gate 	mutex_enter(&ire->ire_lock);
1610Sstevel@tonic-gate 	gw_addr_v6 = ire->ire_gateway_addr_v6;
1620Sstevel@tonic-gate 	mutex_exit(&ire->ire_lock);
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate 	print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len,
1650Sstevel@tonic-gate 	    MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d "
1660Sstevel@tonic-gate 	    "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d "
1670Sstevel@tonic-gate 	    "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n",
1680Sstevel@tonic-gate 	    (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq,
1690Sstevel@tonic-gate 	    (int)ire->ire_zoneid,
1700Sstevel@tonic-gate 	    ire->ire_max_frag, ire->ire_uinfo.iulp_rtt,
1710Sstevel@tonic-gate 	    ire->ire_uinfo.iulp_rtt_sd,
1720Sstevel@tonic-gate 	    ire->ire_uinfo.iulp_ssthresh, ref,
1730Sstevel@tonic-gate 	    ire->ire_uinfo.iulp_rtomax,
1740Sstevel@tonic-gate 	    (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0),
1750Sstevel@tonic-gate 	    (ire->ire_uinfo.iulp_wscale_ok ? 1: 0),
1760Sstevel@tonic-gate 	    (ire->ire_uinfo.iulp_ecn_ok ? 1: 0),
1770Sstevel@tonic-gate 	    (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0),
1780Sstevel@tonic-gate 	    ire->ire_uinfo.iulp_sack,
1790Sstevel@tonic-gate 	    ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe,
1800Sstevel@tonic-gate 	    ib_pkt_count, ire->ire_ob_pkt_count, fo_pkt_count,
1810Sstevel@tonic-gate 	    ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type),
1820Sstevel@tonic-gate 	    inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)),
1830Sstevel@tonic-gate 	    inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)),
1840Sstevel@tonic-gate 	    inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)),
1850Sstevel@tonic-gate 	    inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4)));
1860Sstevel@tonic-gate 	if (print_len < buf_len) {
1870Sstevel@tonic-gate 		((mblk_t *)mp)->b_wptr += print_len;
1880Sstevel@tonic-gate 	} else {
1890Sstevel@tonic-gate 		((mblk_t *)mp)->b_wptr += buf_len;
1900Sstevel@tonic-gate 	}
1910Sstevel@tonic-gate }
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate /* ire_walk routine invoked for ip_ire_report_v6 for each IRE. */
1940Sstevel@tonic-gate static void
1950Sstevel@tonic-gate ire_report_ctable_v6(ire_t *ire, char *mp)
1960Sstevel@tonic-gate {
1970Sstevel@tonic-gate 	char	buf1[INET6_ADDRSTRLEN];
1980Sstevel@tonic-gate 	char	buf2[INET6_ADDRSTRLEN];
1990Sstevel@tonic-gate 	char	buf3[INET6_ADDRSTRLEN];
2000Sstevel@tonic-gate 	char	buf4[INET6_ADDRSTRLEN];
2010Sstevel@tonic-gate 	uint_t	fo_pkt_count;
2020Sstevel@tonic-gate 	uint_t	ib_pkt_count;
2030Sstevel@tonic-gate 	int	ref;
2040Sstevel@tonic-gate 	in6_addr_t gw_addr_v6;
2050Sstevel@tonic-gate 	uint_t	print_len, buf_len;
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate 	if ((ire->ire_type & IRE_CACHETABLE) == 0)
2080Sstevel@tonic-gate 		return;
2090Sstevel@tonic-gate 	buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr;
2100Sstevel@tonic-gate 	if (buf_len <= 0)
2110Sstevel@tonic-gate 		return;
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate 	/* Number of active references of this ire */
2140Sstevel@tonic-gate 	ref = ire->ire_refcnt;
2150Sstevel@tonic-gate 	/* "inbound" to a non local address is a forward */
2160Sstevel@tonic-gate 	ib_pkt_count = ire->ire_ib_pkt_count;
2170Sstevel@tonic-gate 	fo_pkt_count = 0;
2180Sstevel@tonic-gate 	ASSERT(!(ire->ire_type & IRE_BROADCAST));
2190Sstevel@tonic-gate 	if (ire->ire_type & IRE_LOCAL) {
2200Sstevel@tonic-gate 		fo_pkt_count = ib_pkt_count;
2210Sstevel@tonic-gate 		ib_pkt_count = 0;
2220Sstevel@tonic-gate 	}
2230Sstevel@tonic-gate 
2240Sstevel@tonic-gate 	mutex_enter(&ire->ire_lock);
2250Sstevel@tonic-gate 	gw_addr_v6 = ire->ire_gateway_addr_v6;
2260Sstevel@tonic-gate 	mutex_exit(&ire->ire_lock);
2270Sstevel@tonic-gate 
2280Sstevel@tonic-gate 	print_len =  snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len,
2290Sstevel@tonic-gate 	    MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d "
2300Sstevel@tonic-gate 	    "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d "
2310Sstevel@tonic-gate 	    "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n",
2320Sstevel@tonic-gate 	    (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq,
2330Sstevel@tonic-gate 	    (int)ire->ire_zoneid,
2340Sstevel@tonic-gate 	    ire->ire_max_frag, ire->ire_uinfo.iulp_rtt,
2350Sstevel@tonic-gate 	    ire->ire_uinfo.iulp_rtt_sd, ire->ire_uinfo.iulp_ssthresh, ref,
2360Sstevel@tonic-gate 	    ire->ire_uinfo.iulp_rtomax,
2370Sstevel@tonic-gate 	    (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0),
2380Sstevel@tonic-gate 	    (ire->ire_uinfo.iulp_wscale_ok ? 1: 0),
2390Sstevel@tonic-gate 	    (ire->ire_uinfo.iulp_ecn_ok ? 1: 0),
2400Sstevel@tonic-gate 	    (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0),
2410Sstevel@tonic-gate 	    ire->ire_uinfo.iulp_sack,
2420Sstevel@tonic-gate 	    ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe,
2430Sstevel@tonic-gate 	    ib_pkt_count, ire->ire_ob_pkt_count,
2440Sstevel@tonic-gate 	    fo_pkt_count, ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type),
2450Sstevel@tonic-gate 	    inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)),
2460Sstevel@tonic-gate 	    inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)),
2470Sstevel@tonic-gate 	    inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)),
2480Sstevel@tonic-gate 	    inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4)));
2490Sstevel@tonic-gate 	if (print_len < buf_len) {
2500Sstevel@tonic-gate 		((mblk_t *)mp)->b_wptr += print_len;
2510Sstevel@tonic-gate 	} else {
2520Sstevel@tonic-gate 		((mblk_t *)mp)->b_wptr += buf_len;
2530Sstevel@tonic-gate 	}
2540Sstevel@tonic-gate }
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate /*
2580Sstevel@tonic-gate  * Initialize the ire that is specific to IPv6 part and call
2590Sstevel@tonic-gate  * ire_init_common to finish it.
2600Sstevel@tonic-gate  */
2610Sstevel@tonic-gate ire_t *
2620Sstevel@tonic-gate ire_init_v6(ire_t *ire, const in6_addr_t *v6addr,
2630Sstevel@tonic-gate     const in6_addr_t *v6mask, const in6_addr_t *v6src_addr,
2640Sstevel@tonic-gate     const in6_addr_t *v6gateway, uint_t *max_fragp,
2650Sstevel@tonic-gate     mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type,
2660Sstevel@tonic-gate     mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask,
2671676Sjpk     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info,
2683448Sdh155122     tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
2690Sstevel@tonic-gate {
2702535Ssangeeta 
2711676Sjpk 	/*
2721676Sjpk 	 * Reject IRE security attribute creation/initialization
2731676Sjpk 	 * if system is not running in Trusted mode.
2741676Sjpk 	 */
2751676Sjpk 	if ((gc != NULL || gcgrp != NULL) && !is_system_labeled())
2761676Sjpk 		return (NULL);
2771676Sjpk 
2780Sstevel@tonic-gate 	if (fp_mp != NULL) {
2790Sstevel@tonic-gate 		/*
2800Sstevel@tonic-gate 		 * We can't dupb() here as multiple threads could be
2810Sstevel@tonic-gate 		 * calling dupb on the same mp which is incorrect.
2820Sstevel@tonic-gate 		 * First dupb() should be called only by one thread.
2830Sstevel@tonic-gate 		 */
2840Sstevel@tonic-gate 		fp_mp = copyb(fp_mp);
2850Sstevel@tonic-gate 		if (fp_mp == NULL)
2860Sstevel@tonic-gate 			return (NULL);
2870Sstevel@tonic-gate 	}
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate 	if (dlureq_mp != NULL) {
2900Sstevel@tonic-gate 		/*
2910Sstevel@tonic-gate 		 * We can't dupb() here as multiple threads could be
2920Sstevel@tonic-gate 		 * calling dupb on the same mp which is incorrect.
2930Sstevel@tonic-gate 		 * First dupb() should be called only by one thread.
2940Sstevel@tonic-gate 		 */
2950Sstevel@tonic-gate 		dlureq_mp = copyb(dlureq_mp);
2960Sstevel@tonic-gate 		if (dlureq_mp == NULL) {
2970Sstevel@tonic-gate 			if (fp_mp != NULL)
2980Sstevel@tonic-gate 				freeb(fp_mp);
2990Sstevel@tonic-gate 			return (NULL);
3000Sstevel@tonic-gate 		}
3010Sstevel@tonic-gate 	}
3020Sstevel@tonic-gate 
3033448Sdh155122 	BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced);
3040Sstevel@tonic-gate 	ire->ire_addr_v6 = *v6addr;
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate 	if (v6src_addr != NULL)
3070Sstevel@tonic-gate 		ire->ire_src_addr_v6 = *v6src_addr;
3080Sstevel@tonic-gate 	if (v6mask != NULL) {
3090Sstevel@tonic-gate 		ire->ire_mask_v6 = *v6mask;
3100Sstevel@tonic-gate 		ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6);
3110Sstevel@tonic-gate 	}
3120Sstevel@tonic-gate 	if (v6gateway != NULL)
3130Sstevel@tonic-gate 		ire->ire_gateway_addr_v6 = *v6gateway;
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate 	if (type == IRE_CACHE && v6cmask != NULL)
3160Sstevel@tonic-gate 		ire->ire_cmask_v6 = *v6cmask;
3170Sstevel@tonic-gate 
3180Sstevel@tonic-gate 	/*
3190Sstevel@tonic-gate 	 * Multirouted packets need to have a fragment header added so that
3200Sstevel@tonic-gate 	 * the receiver is able to discard duplicates according to their
3210Sstevel@tonic-gate 	 * fragment identifier.
3220Sstevel@tonic-gate 	 */
3230Sstevel@tonic-gate 	if (type == IRE_CACHE && (flags & RTF_MULTIRT)) {
3240Sstevel@tonic-gate 		ire->ire_frag_flag = IPH_FRAG_HDR;
3250Sstevel@tonic-gate 	}
3260Sstevel@tonic-gate 
3271676Sjpk 	/* ire_init_common will free the mblks upon encountering any failure */
3281676Sjpk 	if (!ire_init_common(ire, max_fragp, fp_mp, rfq, stq, type, dlureq_mp,
3291676Sjpk 	    ipif, NULL, phandle, ihandle, flags, IPV6_VERSION, ulp_info,
3303448Sdh155122 	    gc, gcgrp, ipst))
3311676Sjpk 		return (NULL);
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate 	return (ire);
3340Sstevel@tonic-gate }
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate /*
3370Sstevel@tonic-gate  * Similar to ire_create_v6 except that it is called only when
3380Sstevel@tonic-gate  * we want to allocate ire as an mblk e.g. we have a external
3390Sstevel@tonic-gate  * resolver. Do we need this in IPv6 ?
3400Sstevel@tonic-gate  */
3410Sstevel@tonic-gate ire_t *
3420Sstevel@tonic-gate ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
3430Sstevel@tonic-gate     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
3440Sstevel@tonic-gate     mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type,
3450Sstevel@tonic-gate     mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask,
3461676Sjpk     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info,
3473448Sdh155122     tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
3480Sstevel@tonic-gate {
3490Sstevel@tonic-gate 	ire_t	*ire;
3500Sstevel@tonic-gate 	ire_t	*ret_ire;
3510Sstevel@tonic-gate 	mblk_t	*mp;
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate 	/* Allocate the new IRE. */
3560Sstevel@tonic-gate 	mp = allocb(sizeof (ire_t), BPRI_MED);
3570Sstevel@tonic-gate 	if (mp == NULL) {
3580Sstevel@tonic-gate 		ip1dbg(("ire_create_mp_v6: alloc failed\n"));
3590Sstevel@tonic-gate 		return (NULL);
3600Sstevel@tonic-gate 	}
3610Sstevel@tonic-gate 
3620Sstevel@tonic-gate 	ire = (ire_t *)mp->b_rptr;
3630Sstevel@tonic-gate 	mp->b_wptr = (uchar_t *)&ire[1];
3640Sstevel@tonic-gate 
3650Sstevel@tonic-gate 	/* Start clean. */
3660Sstevel@tonic-gate 	*ire = ire_null;
3670Sstevel@tonic-gate 	ire->ire_mp = mp;
3680Sstevel@tonic-gate 	mp->b_datap->db_type = IRE_DB_TYPE;
3690Sstevel@tonic-gate 
3700Sstevel@tonic-gate 	ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway,
3710Sstevel@tonic-gate 	    NULL, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle,
3723448Sdh155122 	    ihandle, flags, ulp_info, gc, gcgrp, ipst);
3730Sstevel@tonic-gate 
3740Sstevel@tonic-gate 	if (ret_ire == NULL) {
3750Sstevel@tonic-gate 		freeb(ire->ire_mp);
3760Sstevel@tonic-gate 		return (NULL);
3770Sstevel@tonic-gate 	}
3780Sstevel@tonic-gate 	return (ire);
3790Sstevel@tonic-gate }
3800Sstevel@tonic-gate 
3810Sstevel@tonic-gate /*
3820Sstevel@tonic-gate  * ire_create_v6 is called to allocate and initialize a new IRE.
3830Sstevel@tonic-gate  *
3840Sstevel@tonic-gate  * NOTE : This is called as writer sometimes though not required
3850Sstevel@tonic-gate  * by this function.
3860Sstevel@tonic-gate  */
3870Sstevel@tonic-gate ire_t *
3880Sstevel@tonic-gate ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
3890Sstevel@tonic-gate     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
3900Sstevel@tonic-gate     uint_t *max_fragp, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type,
3910Sstevel@tonic-gate     mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask,
3921676Sjpk     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info,
3933448Sdh155122     tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
3940Sstevel@tonic-gate {
3950Sstevel@tonic-gate 	ire_t	*ire;
3960Sstevel@tonic-gate 	ire_t	*ret_ire;
3970Sstevel@tonic-gate 
3980Sstevel@tonic-gate 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
3990Sstevel@tonic-gate 
4000Sstevel@tonic-gate 	ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP);
4010Sstevel@tonic-gate 	if (ire == NULL) {
4020Sstevel@tonic-gate 		ip1dbg(("ire_create_v6: alloc failed\n"));
4030Sstevel@tonic-gate 		return (NULL);
4040Sstevel@tonic-gate 	}
4050Sstevel@tonic-gate 	*ire = ire_null;
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate 	ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway,
4080Sstevel@tonic-gate 	    max_fragp, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle,
4093448Sdh155122 	    ihandle, flags, ulp_info, gc, gcgrp, ipst);
4100Sstevel@tonic-gate 
4110Sstevel@tonic-gate 	if (ret_ire == NULL) {
4120Sstevel@tonic-gate 		kmem_cache_free(ire_cache, ire);
4130Sstevel@tonic-gate 		return (NULL);
4140Sstevel@tonic-gate 	}
4150Sstevel@tonic-gate 	ASSERT(ret_ire == ire);
4160Sstevel@tonic-gate 	return (ire);
4170Sstevel@tonic-gate }
4180Sstevel@tonic-gate 
4190Sstevel@tonic-gate /*
4200Sstevel@tonic-gate  * Find an IRE_INTERFACE for the multicast group.
4210Sstevel@tonic-gate  * Allows different routes for multicast addresses
4220Sstevel@tonic-gate  * in the unicast routing table (akin to FF::0/8 but could be more specific)
4230Sstevel@tonic-gate  * which point at different interfaces. This is used when IPV6_MULTICAST_IF
4240Sstevel@tonic-gate  * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't
4250Sstevel@tonic-gate  * specify the interface to join on.
4260Sstevel@tonic-gate  *
4270Sstevel@tonic-gate  * Supports link-local addresses by following the ipif/ill when recursing.
4280Sstevel@tonic-gate  */
4290Sstevel@tonic-gate ire_t *
4303448Sdh155122 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst)
4310Sstevel@tonic-gate {
4320Sstevel@tonic-gate 	ire_t	*ire;
4330Sstevel@tonic-gate 	ipif_t	*ipif = NULL;
4340Sstevel@tonic-gate 	int	match_flags = MATCH_IRE_TYPE;
4350Sstevel@tonic-gate 	in6_addr_t gw_addr_v6;
4360Sstevel@tonic-gate 
4370Sstevel@tonic-gate 	ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL,
4383448Sdh155122 	    zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst);
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate 	/* We search a resolvable ire in case of multirouting. */
4410Sstevel@tonic-gate 	if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) {
4420Sstevel@tonic-gate 		ire_t *cire = NULL;
4430Sstevel@tonic-gate 		/*
4440Sstevel@tonic-gate 		 * If the route is not resolvable, the looked up ire
4450Sstevel@tonic-gate 		 * may be changed here. In that case, ire_multirt_lookup()
4460Sstevel@tonic-gate 		 * IRE_REFRELE the original ire and change it.
4470Sstevel@tonic-gate 		 */
4481676Sjpk 		(void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW,
4493448Sdh155122 		    NULL, ipst);
4500Sstevel@tonic-gate 		if (cire != NULL)
4510Sstevel@tonic-gate 			ire_refrele(cire);
4520Sstevel@tonic-gate 	}
4530Sstevel@tonic-gate 	if (ire == NULL)
4540Sstevel@tonic-gate 		return (NULL);
4550Sstevel@tonic-gate 	/*
4560Sstevel@tonic-gate 	 * Make sure we follow ire_ipif.
4570Sstevel@tonic-gate 	 *
4580Sstevel@tonic-gate 	 * We need to determine the interface route through
4590Sstevel@tonic-gate 	 * which the gateway will be reached. We don't really
4600Sstevel@tonic-gate 	 * care which interface is picked if the interface is
4610Sstevel@tonic-gate 	 * part of a group.
4620Sstevel@tonic-gate 	 */
4630Sstevel@tonic-gate 	if (ire->ire_ipif != NULL) {
4640Sstevel@tonic-gate 		ipif = ire->ire_ipif;
4650Sstevel@tonic-gate 		match_flags |= MATCH_IRE_ILL_GROUP;
4660Sstevel@tonic-gate 	}
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 	switch (ire->ire_type) {
4690Sstevel@tonic-gate 	case IRE_DEFAULT:
4700Sstevel@tonic-gate 	case IRE_PREFIX:
4710Sstevel@tonic-gate 	case IRE_HOST:
4720Sstevel@tonic-gate 		mutex_enter(&ire->ire_lock);
4730Sstevel@tonic-gate 		gw_addr_v6 = ire->ire_gateway_addr_v6;
4740Sstevel@tonic-gate 		mutex_exit(&ire->ire_lock);
4750Sstevel@tonic-gate 		ire_refrele(ire);
4760Sstevel@tonic-gate 		ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0,
4770Sstevel@tonic-gate 		    IRE_INTERFACE, ipif, NULL, zoneid, 0,
4783448Sdh155122 		    NULL, match_flags, ipst);
4790Sstevel@tonic-gate 		return (ire);
4800Sstevel@tonic-gate 	case IRE_IF_NORESOLVER:
4810Sstevel@tonic-gate 	case IRE_IF_RESOLVER:
4820Sstevel@tonic-gate 		return (ire);
4830Sstevel@tonic-gate 	default:
4840Sstevel@tonic-gate 		ire_refrele(ire);
4850Sstevel@tonic-gate 		return (NULL);
4860Sstevel@tonic-gate 	}
4870Sstevel@tonic-gate }
4880Sstevel@tonic-gate 
4890Sstevel@tonic-gate /*
4900Sstevel@tonic-gate  * Return any local address.  We use this to target ourselves
4910Sstevel@tonic-gate  * when the src address was specified as 'default'.
4920Sstevel@tonic-gate  * Preference for IRE_LOCAL entries.
4930Sstevel@tonic-gate  */
4940Sstevel@tonic-gate ire_t *
4953448Sdh155122 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst)
4960Sstevel@tonic-gate {
4970Sstevel@tonic-gate 	ire_t	*ire;
4980Sstevel@tonic-gate 	irb_t	*irb;
4990Sstevel@tonic-gate 	ire_t	*maybe = NULL;
5000Sstevel@tonic-gate 	int i;
5010Sstevel@tonic-gate 
5023448Sdh155122 	for (i = 0; i < ipst->ips_ip6_cache_table_size;  i++) {
5033448Sdh155122 		irb = &ipst->ips_ip_cache_table_v6[i];
5040Sstevel@tonic-gate 		if (irb->irb_ire == NULL)
5050Sstevel@tonic-gate 			continue;
5060Sstevel@tonic-gate 		rw_enter(&irb->irb_lock, RW_READER);
5070Sstevel@tonic-gate 		for (ire = irb->irb_ire; ire; ire = ire->ire_next) {
5080Sstevel@tonic-gate 			if ((ire->ire_marks & IRE_MARK_CONDEMNED) ||
5091676Sjpk 			    ire->ire_zoneid != zoneid &&
5101676Sjpk 			    ire->ire_zoneid != ALL_ZONES)
5110Sstevel@tonic-gate 				continue;
5120Sstevel@tonic-gate 			switch (ire->ire_type) {
5130Sstevel@tonic-gate 			case IRE_LOOPBACK:
5140Sstevel@tonic-gate 				if (maybe == NULL) {
5150Sstevel@tonic-gate 					IRE_REFHOLD(ire);
5160Sstevel@tonic-gate 					maybe = ire;
5170Sstevel@tonic-gate 				}
5180Sstevel@tonic-gate 				break;
5190Sstevel@tonic-gate 			case IRE_LOCAL:
5200Sstevel@tonic-gate 				if (maybe != NULL) {
5210Sstevel@tonic-gate 					ire_refrele(maybe);
5220Sstevel@tonic-gate 				}
5230Sstevel@tonic-gate 				IRE_REFHOLD(ire);
5240Sstevel@tonic-gate 				rw_exit(&irb->irb_lock);
5250Sstevel@tonic-gate 				return (ire);
5260Sstevel@tonic-gate 			}
5270Sstevel@tonic-gate 		}
5280Sstevel@tonic-gate 		rw_exit(&irb->irb_lock);
5290Sstevel@tonic-gate 	}
5300Sstevel@tonic-gate 	return (maybe);
5310Sstevel@tonic-gate }
5320Sstevel@tonic-gate 
5330Sstevel@tonic-gate /*
5340Sstevel@tonic-gate  * This function takes a mask and returns number of bits set in the
5350Sstevel@tonic-gate  * mask (the represented prefix length).  Assumes a contiguous mask.
5360Sstevel@tonic-gate  */
5370Sstevel@tonic-gate int
5380Sstevel@tonic-gate ip_mask_to_plen_v6(const in6_addr_t *v6mask)
5390Sstevel@tonic-gate {
5400Sstevel@tonic-gate 	int		bits;
5410Sstevel@tonic-gate 	int		plen = IPV6_ABITS;
5420Sstevel@tonic-gate 	int		i;
5430Sstevel@tonic-gate 
5440Sstevel@tonic-gate 	for (i = 3; i >= 0; i--) {
5450Sstevel@tonic-gate 		if (v6mask->s6_addr32[i] == 0) {
5460Sstevel@tonic-gate 			plen -= 32;
5470Sstevel@tonic-gate 			continue;
5480Sstevel@tonic-gate 		}
5490Sstevel@tonic-gate 		bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
5500Sstevel@tonic-gate 		if (bits == 0)
5510Sstevel@tonic-gate 			break;
5520Sstevel@tonic-gate 		plen -= bits;
5530Sstevel@tonic-gate 	}
5540Sstevel@tonic-gate 
5550Sstevel@tonic-gate 	return (plen);
5560Sstevel@tonic-gate }
5570Sstevel@tonic-gate 
5580Sstevel@tonic-gate /*
5590Sstevel@tonic-gate  * Convert a prefix length to the mask for that prefix.
5600Sstevel@tonic-gate  * Returns the argument bitmask.
5610Sstevel@tonic-gate  */
5620Sstevel@tonic-gate in6_addr_t *
5630Sstevel@tonic-gate ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask)
5640Sstevel@tonic-gate {
5650Sstevel@tonic-gate 	uint32_t *ptr;
5660Sstevel@tonic-gate 
5670Sstevel@tonic-gate 	if (plen < 0 || plen > IPV6_ABITS)
5680Sstevel@tonic-gate 		return (NULL);
5690Sstevel@tonic-gate 	*bitmask = ipv6_all_zeros;
5700Sstevel@tonic-gate 
5710Sstevel@tonic-gate 	ptr = (uint32_t *)bitmask;
5720Sstevel@tonic-gate 	while (plen > 32) {
5730Sstevel@tonic-gate 		*ptr++ = 0xffffffffU;
5740Sstevel@tonic-gate 		plen -= 32;
5750Sstevel@tonic-gate 	}
5760Sstevel@tonic-gate 	*ptr = htonl(0xffffffffU << (32 - plen));
5770Sstevel@tonic-gate 	return (bitmask);
5780Sstevel@tonic-gate }
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate /*
5810Sstevel@tonic-gate  * Add a fully initialized IRE to an appropriate
5820Sstevel@tonic-gate  * table based on ire_type.
5830Sstevel@tonic-gate  *
5843004Sdd193516  * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and
5850Sstevel@tonic-gate  * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT.
5860Sstevel@tonic-gate  *
5870Sstevel@tonic-gate  * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK
5880Sstevel@tonic-gate  * and IRE_CACHE.
5890Sstevel@tonic-gate  *
5900Sstevel@tonic-gate  * NOTE : This function is called as writer though not required
5910Sstevel@tonic-gate  * by this function.
5920Sstevel@tonic-gate  */
5930Sstevel@tonic-gate int
5940Sstevel@tonic-gate ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func)
5950Sstevel@tonic-gate {
5960Sstevel@tonic-gate 	ire_t	*ire1;
5970Sstevel@tonic-gate 	int	mask_table_index;
5980Sstevel@tonic-gate 	irb_t	*irb_ptr;
5990Sstevel@tonic-gate 	ire_t	**irep;
6000Sstevel@tonic-gate 	int	flags;
6010Sstevel@tonic-gate 	ire_t	*pire = NULL;
6020Sstevel@tonic-gate 	ill_t	*stq_ill;
6030Sstevel@tonic-gate 	boolean_t	ndp_g_lock_held = B_FALSE;
6040Sstevel@tonic-gate 	ire_t	*ire = *ire_p;
6050Sstevel@tonic-gate 	int	error;
6063448Sdh155122 	ip_stack_t	*ipst = ire->ire_ipst;
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
6090Sstevel@tonic-gate 	ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */
6100Sstevel@tonic-gate 	ASSERT(ire->ire_nce == NULL);
6110Sstevel@tonic-gate 
6120Sstevel@tonic-gate 	/* Find the appropriate list head. */
6130Sstevel@tonic-gate 	switch (ire->ire_type) {
6140Sstevel@tonic-gate 	case IRE_HOST:
6150Sstevel@tonic-gate 		ire->ire_mask_v6 = ipv6_all_ones;
6160Sstevel@tonic-gate 		ire->ire_masklen = IPV6_ABITS;
6170Sstevel@tonic-gate 		if ((ire->ire_flags & RTF_SETSRC) == 0)
6180Sstevel@tonic-gate 			ire->ire_src_addr_v6 = ipv6_all_zeros;
6190Sstevel@tonic-gate 		break;
6200Sstevel@tonic-gate 	case IRE_CACHE:
6210Sstevel@tonic-gate 	case IRE_LOCAL:
6220Sstevel@tonic-gate 	case IRE_LOOPBACK:
6230Sstevel@tonic-gate 		ire->ire_mask_v6 = ipv6_all_ones;
6240Sstevel@tonic-gate 		ire->ire_masklen = IPV6_ABITS;
6250Sstevel@tonic-gate 		break;
6260Sstevel@tonic-gate 	case IRE_PREFIX:
6270Sstevel@tonic-gate 		if ((ire->ire_flags & RTF_SETSRC) == 0)
6280Sstevel@tonic-gate 			ire->ire_src_addr_v6 = ipv6_all_zeros;
6290Sstevel@tonic-gate 		break;
6300Sstevel@tonic-gate 	case IRE_DEFAULT:
6310Sstevel@tonic-gate 		if ((ire->ire_flags & RTF_SETSRC) == 0)
6320Sstevel@tonic-gate 			ire->ire_src_addr_v6 = ipv6_all_zeros;
6330Sstevel@tonic-gate 		break;
6340Sstevel@tonic-gate 	case IRE_IF_RESOLVER:
6350Sstevel@tonic-gate 	case IRE_IF_NORESOLVER:
6360Sstevel@tonic-gate 		break;
6370Sstevel@tonic-gate 	default:
6380Sstevel@tonic-gate 		printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n",
6390Sstevel@tonic-gate 		    (void *)ire, ire->ire_type);
6400Sstevel@tonic-gate 		ire_delete(ire);
6410Sstevel@tonic-gate 		*ire_p = NULL;
6420Sstevel@tonic-gate 		return (EINVAL);
6430Sstevel@tonic-gate 	}
6440Sstevel@tonic-gate 
6450Sstevel@tonic-gate 	/* Make sure the address is properly masked. */
6460Sstevel@tonic-gate 	V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6);
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	if ((ire->ire_type & IRE_CACHETABLE) == 0) {
6490Sstevel@tonic-gate 		/* IRE goes into Forward Table */
6500Sstevel@tonic-gate 		mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6);
6513448Sdh155122 		if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) ==
6523448Sdh155122 		    NULL) {
6530Sstevel@tonic-gate 			irb_t *ptr;
6540Sstevel@tonic-gate 			int i;
6550Sstevel@tonic-gate 
6563448Sdh155122 			ptr = (irb_t *)mi_zalloc((
6573448Sdh155122 			    ipst->ips_ip6_ftable_hash_size * sizeof (irb_t)));
6580Sstevel@tonic-gate 			if (ptr == NULL) {
6590Sstevel@tonic-gate 				ire_delete(ire);
6600Sstevel@tonic-gate 				*ire_p = NULL;
6610Sstevel@tonic-gate 				return (ENOMEM);
6620Sstevel@tonic-gate 			}
6633448Sdh155122 			for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) {
6640Sstevel@tonic-gate 				rw_init(&ptr[i].irb_lock, NULL,
6650Sstevel@tonic-gate 				    RW_DEFAULT, NULL);
6660Sstevel@tonic-gate 			}
6673448Sdh155122 			mutex_enter(&ipst->ips_ire_ft_init_lock);
6683448Sdh155122 			if (ipst->ips_ip_forwarding_table_v6[
6693448Sdh155122 			    mask_table_index] == NULL) {
6703448Sdh155122 				ipst->ips_ip_forwarding_table_v6[
6713448Sdh155122 				    mask_table_index] = ptr;
6723448Sdh155122 				mutex_exit(&ipst->ips_ire_ft_init_lock);
6730Sstevel@tonic-gate 			} else {
6740Sstevel@tonic-gate 				/*
6750Sstevel@tonic-gate 				 * Some other thread won the race in
6760Sstevel@tonic-gate 				 * initializing the forwarding table at the
6770Sstevel@tonic-gate 				 * same index.
6780Sstevel@tonic-gate 				 */
6793448Sdh155122 				mutex_exit(&ipst->ips_ire_ft_init_lock);
6803448Sdh155122 				for (i = 0; i < ipst->ips_ip6_ftable_hash_size;
6813448Sdh155122 				    i++) {
6820Sstevel@tonic-gate 					rw_destroy(&ptr[i].irb_lock);
6830Sstevel@tonic-gate 				}
6840Sstevel@tonic-gate 				mi_free(ptr);
6850Sstevel@tonic-gate 			}
6860Sstevel@tonic-gate 		}
6873448Sdh155122 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][
6880Sstevel@tonic-gate 		    IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6,
6893448Sdh155122 		    ipst->ips_ip6_ftable_hash_size)]);
6900Sstevel@tonic-gate 	} else {
6913448Sdh155122 		irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(
6923448Sdh155122 		    ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]);
6930Sstevel@tonic-gate 	}
6940Sstevel@tonic-gate 	/*
6950Sstevel@tonic-gate 	 * For xresolv interfaces (v6 interfaces with an external
6960Sstevel@tonic-gate 	 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6
6970Sstevel@tonic-gate 	 * are unable to prevent the deletion of the interface route
6980Sstevel@tonic-gate 	 * while adding an IRE_CACHE for an on-link destination
6990Sstevel@tonic-gate 	 * in the IRE_IF_RESOLVER case, since the ire has to go to
7000Sstevel@tonic-gate 	 * the external resolver and return. We can't do a REFHOLD on the
7010Sstevel@tonic-gate 	 * associated interface ire for fear of the message being freed
7020Sstevel@tonic-gate 	 * if the external resolver can't resolve the address.
7030Sstevel@tonic-gate 	 * Here we look up the interface ire in the forwarding table
7040Sstevel@tonic-gate 	 * and make sure that the interface route has not been deleted.
7050Sstevel@tonic-gate 	 */
7060Sstevel@tonic-gate 	if (ire->ire_type == IRE_CACHE &&
7070Sstevel@tonic-gate 	    IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) &&
7080Sstevel@tonic-gate 	    (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) &&
7090Sstevel@tonic-gate 	    (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) {
7100Sstevel@tonic-gate 
7110Sstevel@tonic-gate 		pire = ire_ihandle_lookup_onlink_v6(ire);
7120Sstevel@tonic-gate 		if (pire == NULL) {
7130Sstevel@tonic-gate 			ire_delete(ire);
7140Sstevel@tonic-gate 			*ire_p = NULL;
7150Sstevel@tonic-gate 			return (EINVAL);
7160Sstevel@tonic-gate 		}
7170Sstevel@tonic-gate 		/* Prevent pire from getting deleted */
7180Sstevel@tonic-gate 		IRB_REFHOLD(pire->ire_bucket);
7190Sstevel@tonic-gate 		/* Has it been removed already? */
7200Sstevel@tonic-gate 		if (pire->ire_marks & IRE_MARK_CONDEMNED) {
7210Sstevel@tonic-gate 			IRB_REFRELE(pire->ire_bucket);
7220Sstevel@tonic-gate 			ire_refrele(pire);
7230Sstevel@tonic-gate 			ire_delete(ire);
7240Sstevel@tonic-gate 			*ire_p = NULL;
7250Sstevel@tonic-gate 			return (EINVAL);
7260Sstevel@tonic-gate 		}
7270Sstevel@tonic-gate 	}
7280Sstevel@tonic-gate 
7290Sstevel@tonic-gate 	flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW);
7300Sstevel@tonic-gate 	/*
7310Sstevel@tonic-gate 	 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check
7320Sstevel@tonic-gate 	 * for duplicates because :
7330Sstevel@tonic-gate 	 *
7340Sstevel@tonic-gate 	 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be
7350Sstevel@tonic-gate 	 *    pointing at different ills. A real duplicate is
7360Sstevel@tonic-gate 	 *    a match on both ire_ipif and ire_stq.
7370Sstevel@tonic-gate 	 *
7380Sstevel@tonic-gate 	 * 2) We could have multiple packets trying to create
7390Sstevel@tonic-gate 	 *    an IRE_CACHE for the same ill.
7400Sstevel@tonic-gate 	 *
7410Sstevel@tonic-gate 	 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants
7420Sstevel@tonic-gate 	 * to go out on a particular ill. Rather than looking at the
7430Sstevel@tonic-gate 	 * packet, we depend on the above for MATCH_IRE_ILL here.
7440Sstevel@tonic-gate 	 *
7450Sstevel@tonic-gate 	 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have
7460Sstevel@tonic-gate 	 * multiple IRE_CACHES for an ill for the same destination
7470Sstevel@tonic-gate 	 * with various scoped addresses i.e represented by ipifs.
7480Sstevel@tonic-gate 	 *
7490Sstevel@tonic-gate 	 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES.
7500Sstevel@tonic-gate 	 */
7510Sstevel@tonic-gate 	if (ire->ire_ipif != NULL)
7520Sstevel@tonic-gate 		flags |= MATCH_IRE_IPIF;
7530Sstevel@tonic-gate 	/*
7540Sstevel@tonic-gate 	 * If we are creating hidden ires, make sure we search on
7550Sstevel@tonic-gate 	 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are
7560Sstevel@tonic-gate 	 * searching for duplicates below. Otherwise we could
7570Sstevel@tonic-gate 	 * potentially find an IRE on some other interface
7580Sstevel@tonic-gate 	 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We
7590Sstevel@tonic-gate 	 * shouldn't do this as this will lead to an infinite loop as
7600Sstevel@tonic-gate 	 * eventually we need an hidden ire for this packet to go
7610Sstevel@tonic-gate 	 * out. MATCH_IRE_ILL is already marked above.
7620Sstevel@tonic-gate 	 */
7630Sstevel@tonic-gate 	if (ire->ire_marks & IRE_MARK_HIDDEN) {
7640Sstevel@tonic-gate 		ASSERT(ire->ire_type == IRE_CACHE);
7650Sstevel@tonic-gate 		flags |= MATCH_IRE_MARK_HIDDEN;
7660Sstevel@tonic-gate 	}
7670Sstevel@tonic-gate 
7680Sstevel@tonic-gate 	/*
7690Sstevel@tonic-gate 	 * Start the atomic add of the ire. Grab the ill locks,
7700Sstevel@tonic-gate 	 * ill_g_usesrc_lock and the bucket lock. Check for condemned.
7712535Ssangeeta 	 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself.
7720Sstevel@tonic-gate 	 */
7730Sstevel@tonic-gate 	if (ire->ire_type == IRE_CACHE) {
7743448Sdh155122 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
7750Sstevel@tonic-gate 		ndp_g_lock_held = B_TRUE;
7760Sstevel@tonic-gate 	}
7770Sstevel@tonic-gate 
7780Sstevel@tonic-gate 	/*
7790Sstevel@tonic-gate 	 * If ipif or ill is changing ire_atomic_start() may queue the
7800Sstevel@tonic-gate 	 * request and return EINPROGRESS.
7810Sstevel@tonic-gate 	 */
7820Sstevel@tonic-gate 
7830Sstevel@tonic-gate 	error = ire_atomic_start(irb_ptr, ire, q, mp, func);
7840Sstevel@tonic-gate 	if (error != 0) {
7850Sstevel@tonic-gate 		if (ndp_g_lock_held)
7863448Sdh155122 			mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
7870Sstevel@tonic-gate 		/*
7880Sstevel@tonic-gate 		 * We don't know whether it is a valid ipif or not.
7890Sstevel@tonic-gate 		 * So, set it to NULL. This assumes that the ire has not added
7900Sstevel@tonic-gate 		 * a reference to the ipif.
7910Sstevel@tonic-gate 		 */
7920Sstevel@tonic-gate 		ire->ire_ipif = NULL;
7930Sstevel@tonic-gate 		ire_delete(ire);
7940Sstevel@tonic-gate 		if (pire != NULL) {
7950Sstevel@tonic-gate 			IRB_REFRELE(pire->ire_bucket);
7960Sstevel@tonic-gate 			ire_refrele(pire);
7970Sstevel@tonic-gate 		}
7980Sstevel@tonic-gate 		*ire_p = NULL;
7990Sstevel@tonic-gate 		return (error);
8000Sstevel@tonic-gate 	}
8010Sstevel@tonic-gate 	/*
8020Sstevel@tonic-gate 	 * To avoid creating ires having stale values for the ire_max_frag
8030Sstevel@tonic-gate 	 * we get the latest value atomically here. For more details
8040Sstevel@tonic-gate 	 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE
8050Sstevel@tonic-gate 	 * in ip_rput_dlpi_writer
8060Sstevel@tonic-gate 	 */
8070Sstevel@tonic-gate 	if (ire->ire_max_fragp == NULL) {
8080Sstevel@tonic-gate 		if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6))
8090Sstevel@tonic-gate 			ire->ire_max_frag = ire->ire_ipif->ipif_mtu;
8100Sstevel@tonic-gate 		else
8110Sstevel@tonic-gate 			ire->ire_max_frag = pire->ire_max_frag;
8120Sstevel@tonic-gate 	} else {
8130Sstevel@tonic-gate 		uint_t  max_frag;
8140Sstevel@tonic-gate 
8150Sstevel@tonic-gate 		max_frag = *ire->ire_max_fragp;
8160Sstevel@tonic-gate 		ire->ire_max_fragp = NULL;
8170Sstevel@tonic-gate 		ire->ire_max_frag = max_frag;
8180Sstevel@tonic-gate 	}
8190Sstevel@tonic-gate 
8200Sstevel@tonic-gate 	/*
8210Sstevel@tonic-gate 	 * Atomically check for duplicate and insert in the table.
8220Sstevel@tonic-gate 	 */
8230Sstevel@tonic-gate 	for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
8240Sstevel@tonic-gate 		if (ire1->ire_marks & IRE_MARK_CONDEMNED)
8250Sstevel@tonic-gate 			continue;
8260Sstevel@tonic-gate 
8270Sstevel@tonic-gate 		if (ire->ire_type == IRE_CACHE) {
8280Sstevel@tonic-gate 			/*
8290Sstevel@tonic-gate 			 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES.
8300Sstevel@tonic-gate 			 * As ire_ipif and ire_stq could point to two
8310Sstevel@tonic-gate 			 * different ills, we can't pass just ire_ipif to
8320Sstevel@tonic-gate 			 * ire_match_args and get a match on both ills.
8330Sstevel@tonic-gate 			 * This is just needed for duplicate checks here and
8340Sstevel@tonic-gate 			 * so we don't add an extra argument to
8350Sstevel@tonic-gate 			 * ire_match_args for this. Do it locally.
8360Sstevel@tonic-gate 			 *
8370Sstevel@tonic-gate 			 * NOTE : Currently there is no part of the code
8380Sstevel@tonic-gate 			 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL
8390Sstevel@tonic-gate 			 * match for IRE_CACHEs. Thus we don't want to
8400Sstevel@tonic-gate 			 * extend the arguments to ire_match_args_v6.
8410Sstevel@tonic-gate 			 */
8420Sstevel@tonic-gate 			if (ire1->ire_stq != ire->ire_stq)
8430Sstevel@tonic-gate 				continue;
8440Sstevel@tonic-gate 			/*
8450Sstevel@tonic-gate 			 * Multiroute IRE_CACHEs for a given destination can
8460Sstevel@tonic-gate 			 * have the same ire_ipif, typically if their source
8470Sstevel@tonic-gate 			 * address is forced using RTF_SETSRC, and the same
8480Sstevel@tonic-gate 			 * send-to queue. We differentiate them using the parent
8490Sstevel@tonic-gate 			 * handle.
8500Sstevel@tonic-gate 			 */
8510Sstevel@tonic-gate 			if ((ire1->ire_flags & RTF_MULTIRT) &&
8520Sstevel@tonic-gate 			    (ire->ire_flags & RTF_MULTIRT) &&
8530Sstevel@tonic-gate 			    (ire1->ire_phandle != ire->ire_phandle))
8540Sstevel@tonic-gate 				continue;
8550Sstevel@tonic-gate 		}
8560Sstevel@tonic-gate 		if (ire1->ire_zoneid != ire->ire_zoneid)
8570Sstevel@tonic-gate 			continue;
8580Sstevel@tonic-gate 		if (ire_match_args_v6(ire1, &ire->ire_addr_v6,
8590Sstevel@tonic-gate 		    &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
8601676Sjpk 		    ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL,
8611676Sjpk 		    flags)) {
8620Sstevel@tonic-gate 			/*
8630Sstevel@tonic-gate 			 * Return the old ire after doing a REFHOLD.
8640Sstevel@tonic-gate 			 * As most of the callers continue to use the IRE
8650Sstevel@tonic-gate 			 * after adding, we return a held ire. This will
8660Sstevel@tonic-gate 			 * avoid a lookup in the caller again. If the callers
8670Sstevel@tonic-gate 			 * don't want to use it, they need to do a REFRELE.
8680Sstevel@tonic-gate 			 */
8690Sstevel@tonic-gate 			ip1dbg(("found dup ire existing %p new %p",
8700Sstevel@tonic-gate 			    (void *)ire1, (void *)ire));
8710Sstevel@tonic-gate 			IRE_REFHOLD(ire1);
8720Sstevel@tonic-gate 			if (ndp_g_lock_held)
8733448Sdh155122 				mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
8740Sstevel@tonic-gate 			ire_atomic_end(irb_ptr, ire);
8750Sstevel@tonic-gate 			ire_delete(ire);
8760Sstevel@tonic-gate 			if (pire != NULL) {
8770Sstevel@tonic-gate 				/*
8780Sstevel@tonic-gate 				 * Assert that it is
8790Sstevel@tonic-gate 				 * not yet removed from the list.
8800Sstevel@tonic-gate 				 */
8810Sstevel@tonic-gate 				ASSERT(pire->ire_ptpn != NULL);
8820Sstevel@tonic-gate 				IRB_REFRELE(pire->ire_bucket);
8830Sstevel@tonic-gate 				ire_refrele(pire);
8840Sstevel@tonic-gate 			}
8850Sstevel@tonic-gate 			*ire_p = ire1;
8860Sstevel@tonic-gate 			return (0);
8870Sstevel@tonic-gate 		}
8880Sstevel@tonic-gate 	}
8890Sstevel@tonic-gate 	if (ire->ire_type == IRE_CACHE) {
8900Sstevel@tonic-gate 		in6_addr_t gw_addr_v6;
8910Sstevel@tonic-gate 		ill_t	*ill = ire_to_ill(ire);
8920Sstevel@tonic-gate 		char	buf[INET6_ADDRSTRLEN];
8930Sstevel@tonic-gate 		nce_t	*nce;
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate 		/*
8960Sstevel@tonic-gate 		 * All IRE_CACHE types must have a nce.  If this is
8970Sstevel@tonic-gate 		 * not the case the entry will not be added. We need
8980Sstevel@tonic-gate 		 * to make sure that if somebody deletes the nce
8990Sstevel@tonic-gate 		 * after we looked up, they will find this ire and
9000Sstevel@tonic-gate 		 * delete the ire. To delete this ire one needs the
9010Sstevel@tonic-gate 		 * bucket lock which we are still holding here. So,
9020Sstevel@tonic-gate 		 * even if the nce gets deleted after we looked up,
9030Sstevel@tonic-gate 		 * this ire  will get deleted.
9040Sstevel@tonic-gate 		 *
9050Sstevel@tonic-gate 		 * NOTE : Don't need the ire_lock for accessing
9060Sstevel@tonic-gate 		 * ire_gateway_addr_v6 as it is appearing first
9070Sstevel@tonic-gate 		 * time on the list and rts_setgwr_v6 could not
9080Sstevel@tonic-gate 		 * be changing this.
9090Sstevel@tonic-gate 		 */
9100Sstevel@tonic-gate 		gw_addr_v6 = ire->ire_gateway_addr_v6;
9110Sstevel@tonic-gate 		if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) {
9122535Ssangeeta 			nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE);
9130Sstevel@tonic-gate 		} else {
9142535Ssangeeta 			nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE);
9150Sstevel@tonic-gate 		}
9160Sstevel@tonic-gate 		if (nce == NULL)
9170Sstevel@tonic-gate 			goto failed;
9180Sstevel@tonic-gate 
9190Sstevel@tonic-gate 		/* Pair of refhold, refrele just to get the tracing right */
9202535Ssangeeta 		NCE_REFHOLD_TO_REFHOLD_NOTR(nce);
9210Sstevel@tonic-gate 		/*
9220Sstevel@tonic-gate 		 * Atomically make sure that new IREs don't point
9230Sstevel@tonic-gate 		 * to an NCE that is logically deleted (CONDEMNED).
9240Sstevel@tonic-gate 		 * ndp_delete() first marks the NCE CONDEMNED.
9250Sstevel@tonic-gate 		 * This ensures that the nce_refcnt won't increase
9260Sstevel@tonic-gate 		 * due to new nce_lookups or due to addition of new IREs
9270Sstevel@tonic-gate 		 * pointing to this NCE. Then ndp_delete() cleans up
9280Sstevel@tonic-gate 		 * existing references. If we don't do it atomically here,
9290Sstevel@tonic-gate 		 * ndp_delete() -> nce_ire_delete() will not be able to
9300Sstevel@tonic-gate 		 * clean up the IRE list completely, and the nce_refcnt
9310Sstevel@tonic-gate 		 * won't go down to zero.
9320Sstevel@tonic-gate 		 */
9330Sstevel@tonic-gate 		mutex_enter(&nce->nce_lock);
9340Sstevel@tonic-gate 		if (ill->ill_flags & ILLF_XRESOLV) {
9350Sstevel@tonic-gate 			/*
9360Sstevel@tonic-gate 			 * If we used an external resolver, we may not
9370Sstevel@tonic-gate 			 * have gone through neighbor discovery to get here.
9380Sstevel@tonic-gate 			 * Must update the nce_state before the next check.
9390Sstevel@tonic-gate 			 */
9400Sstevel@tonic-gate 			if (nce->nce_state == ND_INCOMPLETE)
9410Sstevel@tonic-gate 				nce->nce_state = ND_REACHABLE;
9420Sstevel@tonic-gate 		}
9430Sstevel@tonic-gate 		if (nce->nce_state == ND_INCOMPLETE ||
9440Sstevel@tonic-gate 		    (nce->nce_flags & NCE_F_CONDEMNED) ||
9450Sstevel@tonic-gate 		    (nce->nce_state == ND_UNREACHABLE)) {
9460Sstevel@tonic-gate failed:
9470Sstevel@tonic-gate 			if (ndp_g_lock_held)
9483448Sdh155122 				mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
9490Sstevel@tonic-gate 			if (nce != NULL)
9500Sstevel@tonic-gate 				mutex_exit(&nce->nce_lock);
9510Sstevel@tonic-gate 			ire_atomic_end(irb_ptr, ire);
9520Sstevel@tonic-gate 			ip1dbg(("ire_add_v6: No nce for dst %s \n",
9530Sstevel@tonic-gate 			    inet_ntop(AF_INET6, &ire->ire_addr_v6,
9540Sstevel@tonic-gate 			    buf, sizeof (buf))));
9550Sstevel@tonic-gate 			ire_delete(ire);
9560Sstevel@tonic-gate 			if (pire != NULL) {
9570Sstevel@tonic-gate 				/*
9580Sstevel@tonic-gate 				 * Assert that it is
9590Sstevel@tonic-gate 				 * not yet removed from the list.
9600Sstevel@tonic-gate 				 */
9610Sstevel@tonic-gate 				ASSERT(pire->ire_ptpn != NULL);
9620Sstevel@tonic-gate 				IRB_REFRELE(pire->ire_bucket);
9630Sstevel@tonic-gate 				ire_refrele(pire);
9640Sstevel@tonic-gate 			}
9650Sstevel@tonic-gate 			if (nce != NULL)
9660Sstevel@tonic-gate 				NCE_REFRELE_NOTR(nce);
9670Sstevel@tonic-gate 			*ire_p = NULL;
9680Sstevel@tonic-gate 			return (EINVAL);
9690Sstevel@tonic-gate 		} else {
9700Sstevel@tonic-gate 			ire->ire_nce = nce;
9710Sstevel@tonic-gate 		}
9720Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
9730Sstevel@tonic-gate 	}
9740Sstevel@tonic-gate 	/*
9750Sstevel@tonic-gate 	 * Find the first entry that matches ire_addr - provides
9760Sstevel@tonic-gate 	 * tail insertion. *irep will be null if no match.
9770Sstevel@tonic-gate 	 */
9780Sstevel@tonic-gate 	irep = (ire_t **)irb_ptr;
9790Sstevel@tonic-gate 	while ((ire1 = *irep) != NULL &&
9800Sstevel@tonic-gate 	    !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6))
9810Sstevel@tonic-gate 		irep = &ire1->ire_next;
9820Sstevel@tonic-gate 	ASSERT(!(ire->ire_type & IRE_BROADCAST));
9830Sstevel@tonic-gate 
9840Sstevel@tonic-gate 	if (*irep != NULL) {
9850Sstevel@tonic-gate 		/*
9860Sstevel@tonic-gate 		 * Find the last ire which matches ire_addr_v6.
9870Sstevel@tonic-gate 		 * Needed to do tail insertion among entries with the same
9880Sstevel@tonic-gate 		 * ire_addr_v6.
9890Sstevel@tonic-gate 		 */
9900Sstevel@tonic-gate 		while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6,
9910Sstevel@tonic-gate 		    &ire1->ire_addr_v6)) {
9920Sstevel@tonic-gate 			irep = &ire1->ire_next;
9930Sstevel@tonic-gate 			ire1 = *irep;
9940Sstevel@tonic-gate 			if (ire1 == NULL)
9950Sstevel@tonic-gate 				break;
9960Sstevel@tonic-gate 		}
9970Sstevel@tonic-gate 	}
9980Sstevel@tonic-gate 
9990Sstevel@tonic-gate 	if (ire->ire_type == IRE_DEFAULT) {
10000Sstevel@tonic-gate 		/*
10010Sstevel@tonic-gate 		 * We keep a count of default gateways which is used when
10020Sstevel@tonic-gate 		 * assigning them as routes.
10030Sstevel@tonic-gate 		 */
10043448Sdh155122 		ipst->ips_ipv6_ire_default_count++;
10053448Sdh155122 		ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */
10060Sstevel@tonic-gate 	}
10070Sstevel@tonic-gate 	/* Insert at *irep */
10080Sstevel@tonic-gate 	ire1 = *irep;
10090Sstevel@tonic-gate 	if (ire1 != NULL)
10100Sstevel@tonic-gate 		ire1->ire_ptpn = &ire->ire_next;
10110Sstevel@tonic-gate 	ire->ire_next = ire1;
10120Sstevel@tonic-gate 	/* Link the new one in. */
10130Sstevel@tonic-gate 	ire->ire_ptpn = irep;
10140Sstevel@tonic-gate 	/*
10150Sstevel@tonic-gate 	 * ire_walk routines de-reference ire_next without holding
10160Sstevel@tonic-gate 	 * a lock. Before we point to the new ire, we want to make
10170Sstevel@tonic-gate 	 * sure the store that sets the ire_next of the new ire
10180Sstevel@tonic-gate 	 * reaches global visibility, so that ire_walk routines
10190Sstevel@tonic-gate 	 * don't see a truncated list of ires i.e if the ire_next
10200Sstevel@tonic-gate 	 * of the new ire gets set after we do "*irep = ire" due
10210Sstevel@tonic-gate 	 * to re-ordering, the ire_walk thread will see a NULL
10220Sstevel@tonic-gate 	 * once it accesses the ire_next of the new ire.
10230Sstevel@tonic-gate 	 * membar_producer() makes sure that the following store
10240Sstevel@tonic-gate 	 * happens *after* all of the above stores.
10250Sstevel@tonic-gate 	 */
10260Sstevel@tonic-gate 	membar_producer();
10270Sstevel@tonic-gate 	*irep = ire;
10280Sstevel@tonic-gate 	ire->ire_bucket = irb_ptr;
10290Sstevel@tonic-gate 	/*
10300Sstevel@tonic-gate 	 * We return a bumped up IRE above. Keep it symmetrical
10310Sstevel@tonic-gate 	 * so that the callers will always have to release. This
10320Sstevel@tonic-gate 	 * helps the callers of this function because they continue
10330Sstevel@tonic-gate 	 * to use the IRE after adding and hence they don't have to
10340Sstevel@tonic-gate 	 * lookup again after we return the IRE.
10350Sstevel@tonic-gate 	 *
10360Sstevel@tonic-gate 	 * NOTE : We don't have to use atomics as this is appearing
10370Sstevel@tonic-gate 	 * in the list for the first time and no one else can bump
10380Sstevel@tonic-gate 	 * up the reference count on this yet.
10390Sstevel@tonic-gate 	 */
10400Sstevel@tonic-gate 	IRE_REFHOLD_LOCKED(ire);
10413448Sdh155122 	BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted);
10420Sstevel@tonic-gate 	irb_ptr->irb_ire_cnt++;
10430Sstevel@tonic-gate 	if (ire->ire_marks & IRE_MARK_TEMPORARY)
10440Sstevel@tonic-gate 		irb_ptr->irb_tmp_ire_cnt++;
10450Sstevel@tonic-gate 
10460Sstevel@tonic-gate 	if (ire->ire_ipif != NULL) {
10470Sstevel@tonic-gate 		ire->ire_ipif->ipif_ire_cnt++;
10480Sstevel@tonic-gate 		if (ire->ire_stq != NULL) {
10490Sstevel@tonic-gate 			stq_ill = (ill_t *)ire->ire_stq->q_ptr;
10500Sstevel@tonic-gate 			stq_ill->ill_ire_cnt++;
10510Sstevel@tonic-gate 		}
10520Sstevel@tonic-gate 	} else {
10530Sstevel@tonic-gate 		ASSERT(ire->ire_stq == NULL);
10540Sstevel@tonic-gate 	}
10550Sstevel@tonic-gate 
10560Sstevel@tonic-gate 	if (ndp_g_lock_held)
10573448Sdh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
10580Sstevel@tonic-gate 	ire_atomic_end(irb_ptr, ire);
10590Sstevel@tonic-gate 
10600Sstevel@tonic-gate 	if (pire != NULL) {
10610Sstevel@tonic-gate 		/* Assert that it is not removed from the list yet */
10620Sstevel@tonic-gate 		ASSERT(pire->ire_ptpn != NULL);
10630Sstevel@tonic-gate 		IRB_REFRELE(pire->ire_bucket);
10640Sstevel@tonic-gate 		ire_refrele(pire);
10650Sstevel@tonic-gate 	}
10660Sstevel@tonic-gate 
10670Sstevel@tonic-gate 	if (ire->ire_type != IRE_CACHE) {
10680Sstevel@tonic-gate 		/*
10690Sstevel@tonic-gate 		 * For ire's with with host mask see if there is an entry
10700Sstevel@tonic-gate 		 * in the cache. If there is one flush the whole cache as
10710Sstevel@tonic-gate 		 * there might be multiple entries due to RTF_MULTIRT (CGTP).
10720Sstevel@tonic-gate 		 * If no entry is found than there is no need to flush the
10730Sstevel@tonic-gate 		 * cache.
10740Sstevel@tonic-gate 		 */
10750Sstevel@tonic-gate 
10760Sstevel@tonic-gate 		if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) {
10770Sstevel@tonic-gate 			ire_t *lire;
10780Sstevel@tonic-gate 			lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL,
10793448Sdh155122 			    IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
10803448Sdh155122 			    ipst);
10810Sstevel@tonic-gate 			if (lire != NULL) {
10820Sstevel@tonic-gate 				ire_refrele(lire);
10830Sstevel@tonic-gate 				ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
10840Sstevel@tonic-gate 			}
10850Sstevel@tonic-gate 		} else {
10860Sstevel@tonic-gate 			ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
10870Sstevel@tonic-gate 		}
10880Sstevel@tonic-gate 	}
10890Sstevel@tonic-gate 
10900Sstevel@tonic-gate 	*ire_p = ire;
10910Sstevel@tonic-gate 	return (0);
10920Sstevel@tonic-gate }
10930Sstevel@tonic-gate 
10940Sstevel@tonic-gate /*
10950Sstevel@tonic-gate  * Search for all HOST REDIRECT routes that are
10960Sstevel@tonic-gate  * pointing at the specified gateway and
10970Sstevel@tonic-gate  * delete them. This routine is called only
10980Sstevel@tonic-gate  * when a default gateway is going away.
10990Sstevel@tonic-gate  */
11000Sstevel@tonic-gate static void
11013448Sdh155122 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst)
11020Sstevel@tonic-gate {
11030Sstevel@tonic-gate 	irb_t *irb_ptr;
11040Sstevel@tonic-gate 	irb_t *irb;
11050Sstevel@tonic-gate 	ire_t *ire;
11060Sstevel@tonic-gate 	in6_addr_t gw_addr_v6;
11070Sstevel@tonic-gate 	int i;
11080Sstevel@tonic-gate 
11090Sstevel@tonic-gate 	/* get the hash table for HOST routes */
11103448Sdh155122 	irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)];
11110Sstevel@tonic-gate 	if (irb_ptr == NULL)
11120Sstevel@tonic-gate 		return;
11133448Sdh155122 	for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) {
11140Sstevel@tonic-gate 		irb = &irb_ptr[i];
11150Sstevel@tonic-gate 		IRB_REFHOLD(irb);
11160Sstevel@tonic-gate 		for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
11173004Sdd193516 			if (!(ire->ire_flags & RTF_DYNAMIC))
11180Sstevel@tonic-gate 				continue;
11190Sstevel@tonic-gate 			mutex_enter(&ire->ire_lock);
11200Sstevel@tonic-gate 			gw_addr_v6 = ire->ire_gateway_addr_v6;
11210Sstevel@tonic-gate 			mutex_exit(&ire->ire_lock);
11220Sstevel@tonic-gate 			if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway))
11230Sstevel@tonic-gate 				ire_delete(ire);
11240Sstevel@tonic-gate 		}
11250Sstevel@tonic-gate 		IRB_REFRELE(irb);
11260Sstevel@tonic-gate 	}
11270Sstevel@tonic-gate }
11280Sstevel@tonic-gate 
11290Sstevel@tonic-gate /*
11300Sstevel@tonic-gate  * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart
11310Sstevel@tonic-gate  * of ip_ire_clookup_and_delete. The difference being this function does not
11320Sstevel@tonic-gate  * return any value. IPv6 processing of a gratuitous ARP, as it stands, is
11330Sstevel@tonic-gate  * different than IPv4 in that, regardless of the presence of a cache entry
11340Sstevel@tonic-gate  * for this address, an ire_walk_v6 is done. Another difference is that unlike
11350Sstevel@tonic-gate  * in the case of IPv4 this does not take an ipif_t argument, since it is only
11360Sstevel@tonic-gate  * called by ip_arp_news and the match is always only on the address.
11370Sstevel@tonic-gate  */
11380Sstevel@tonic-gate void
11393448Sdh155122 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst)
11400Sstevel@tonic-gate {
11410Sstevel@tonic-gate 	irb_t		*irb;
11420Sstevel@tonic-gate 	ire_t		*cire;
11430Sstevel@tonic-gate 	boolean_t	found = B_FALSE;
11440Sstevel@tonic-gate 
11453448Sdh155122 	irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
11463448Sdh155122 					    ipst->ips_ip6_cache_table_size)];
11470Sstevel@tonic-gate 	IRB_REFHOLD(irb);
11480Sstevel@tonic-gate 	for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) {
11493448Sdh155122 		if (cire->ire_marks & IRE_MARK_CONDEMNED)
11500Sstevel@tonic-gate 			continue;
11510Sstevel@tonic-gate 		if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) {
11520Sstevel@tonic-gate 
11530Sstevel@tonic-gate 			/* This signifies start of a match */
11540Sstevel@tonic-gate 			if (!found)
11550Sstevel@tonic-gate 				found = B_TRUE;
11560Sstevel@tonic-gate 			if (cire->ire_type == IRE_CACHE) {
11570Sstevel@tonic-gate 				if (cire->ire_nce != NULL)
11580Sstevel@tonic-gate 					ndp_delete(cire->ire_nce);
11590Sstevel@tonic-gate 				ire_delete_v6(cire);
11600Sstevel@tonic-gate 			}
11610Sstevel@tonic-gate 		/* End of the match */
11620Sstevel@tonic-gate 		} else if (found)
11630Sstevel@tonic-gate 			break;
11640Sstevel@tonic-gate 	}
11650Sstevel@tonic-gate 	IRB_REFRELE(irb);
11660Sstevel@tonic-gate }
11670Sstevel@tonic-gate 
11680Sstevel@tonic-gate /*
11690Sstevel@tonic-gate  * Delete the specified IRE.
11700Sstevel@tonic-gate  * All calls should use ire_delete().
11710Sstevel@tonic-gate  * Sometimes called as writer though not required by this function.
11720Sstevel@tonic-gate  *
11730Sstevel@tonic-gate  * NOTE : This function is called only if the ire was added
11740Sstevel@tonic-gate  * in the list.
11750Sstevel@tonic-gate  */
11760Sstevel@tonic-gate void
11770Sstevel@tonic-gate ire_delete_v6(ire_t *ire)
11780Sstevel@tonic-gate {
11790Sstevel@tonic-gate 	in6_addr_t gw_addr_v6;
11803448Sdh155122 	ip_stack_t	*ipst = ire->ire_ipst;
11810Sstevel@tonic-gate 
11820Sstevel@tonic-gate 	ASSERT(ire->ire_refcnt >= 1);
11830Sstevel@tonic-gate 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate 	if (ire->ire_type != IRE_CACHE)
11860Sstevel@tonic-gate 		ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
11870Sstevel@tonic-gate 	if (ire->ire_type == IRE_DEFAULT) {
11880Sstevel@tonic-gate 		/*
11890Sstevel@tonic-gate 		 * when a default gateway is going away
11900Sstevel@tonic-gate 		 * delete all the host redirects pointing at that
11910Sstevel@tonic-gate 		 * gateway.
11920Sstevel@tonic-gate 		 */
11930Sstevel@tonic-gate 		mutex_enter(&ire->ire_lock);
11940Sstevel@tonic-gate 		gw_addr_v6 = ire->ire_gateway_addr_v6;
11950Sstevel@tonic-gate 		mutex_exit(&ire->ire_lock);
11963448Sdh155122 		ire_delete_host_redirects_v6(&gw_addr_v6, ipst);
11970Sstevel@tonic-gate 	}
11980Sstevel@tonic-gate }
11990Sstevel@tonic-gate 
12000Sstevel@tonic-gate /*
12013004Sdd193516  * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect
12020Sstevel@tonic-gate  * entries.
12030Sstevel@tonic-gate  */
12040Sstevel@tonic-gate /*ARGSUSED1*/
12050Sstevel@tonic-gate void
12060Sstevel@tonic-gate ire_delete_cache_v6(ire_t *ire, char *arg)
12070Sstevel@tonic-gate {
12080Sstevel@tonic-gate 	char    addrstr1[INET6_ADDRSTRLEN];
12090Sstevel@tonic-gate 	char    addrstr2[INET6_ADDRSTRLEN];
12100Sstevel@tonic-gate 
12113004Sdd193516 	if ((ire->ire_type & IRE_CACHE) ||
12123004Sdd193516 	    (ire->ire_flags & RTF_DYNAMIC)) {
12130Sstevel@tonic-gate 		ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n",
12140Sstevel@tonic-gate 		    inet_ntop(AF_INET6, &ire->ire_addr_v6,
12150Sstevel@tonic-gate 			addrstr1, sizeof (addrstr1)),
12160Sstevel@tonic-gate 		    ire->ire_type,
12170Sstevel@tonic-gate 		    inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6,
12180Sstevel@tonic-gate 			addrstr2, sizeof (addrstr2))));
12190Sstevel@tonic-gate 		ire_delete(ire);
12200Sstevel@tonic-gate 	}
12210Sstevel@tonic-gate 
12220Sstevel@tonic-gate }
12230Sstevel@tonic-gate 
12240Sstevel@tonic-gate /*
12253004Sdd193516  * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries
12260Sstevel@tonic-gate  * that have a given gateway address.
12270Sstevel@tonic-gate  */
12280Sstevel@tonic-gate void
12290Sstevel@tonic-gate ire_delete_cache_gw_v6(ire_t *ire, char *addr)
12300Sstevel@tonic-gate {
12310Sstevel@tonic-gate 	in6_addr_t	*gw_addr = (in6_addr_t *)addr;
12320Sstevel@tonic-gate 	char		buf1[INET6_ADDRSTRLEN];
12330Sstevel@tonic-gate 	char		buf2[INET6_ADDRSTRLEN];
12340Sstevel@tonic-gate 	in6_addr_t	ire_gw_addr_v6;
12350Sstevel@tonic-gate 
12363004Sdd193516 	if (!(ire->ire_type & IRE_CACHE) &&
12373004Sdd193516 	    !(ire->ire_flags & RTF_DYNAMIC))
12380Sstevel@tonic-gate 		return;
12390Sstevel@tonic-gate 
12400Sstevel@tonic-gate 	mutex_enter(&ire->ire_lock);
12410Sstevel@tonic-gate 	ire_gw_addr_v6 = ire->ire_gateway_addr_v6;
12420Sstevel@tonic-gate 	mutex_exit(&ire->ire_lock);
12430Sstevel@tonic-gate 
12440Sstevel@tonic-gate 	if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) {
12450Sstevel@tonic-gate 		ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n",
12460Sstevel@tonic-gate 		    inet_ntop(AF_INET6, &ire->ire_src_addr_v6,
12470Sstevel@tonic-gate 		    buf1, sizeof (buf1)),
12480Sstevel@tonic-gate 		    ire->ire_type,
12490Sstevel@tonic-gate 		    inet_ntop(AF_INET6, &ire_gw_addr_v6,
12500Sstevel@tonic-gate 		    buf2, sizeof (buf2))));
12510Sstevel@tonic-gate 		ire_delete(ire);
12520Sstevel@tonic-gate 	}
12530Sstevel@tonic-gate }
12540Sstevel@tonic-gate 
12550Sstevel@tonic-gate /*
12560Sstevel@tonic-gate  * Remove all IRE_CACHE entries that match
12570Sstevel@tonic-gate  * the ire specified.  (Sometimes called
12580Sstevel@tonic-gate  * as writer though not required by this function.)
12590Sstevel@tonic-gate  *
12600Sstevel@tonic-gate  * The flag argument indicates if the
12610Sstevel@tonic-gate  * flush request is due to addition
12620Sstevel@tonic-gate  * of new route (IRE_FLUSH_ADD) or deletion of old
12630Sstevel@tonic-gate  * route (IRE_FLUSH_DELETE).
12640Sstevel@tonic-gate  *
12650Sstevel@tonic-gate  * This routine takes only the IREs from the forwarding
12660Sstevel@tonic-gate  * table and flushes the corresponding entries from
12670Sstevel@tonic-gate  * the cache table.
12680Sstevel@tonic-gate  *
12690Sstevel@tonic-gate  * When flushing due to the deletion of an old route, it
12700Sstevel@tonic-gate  * just checks the cache handles (ire_phandle and ire_ihandle) and
12710Sstevel@tonic-gate  * deletes the ones that match.
12720Sstevel@tonic-gate  *
12730Sstevel@tonic-gate  * When flushing due to the creation of a new route, it checks
12740Sstevel@tonic-gate  * if a cache entry's address matches the one in the IRE and
12750Sstevel@tonic-gate  * that the cache entry's parent has a less specific mask than the
12760Sstevel@tonic-gate  * one in IRE. The destination of such a cache entry could be the
12770Sstevel@tonic-gate  * gateway for other cache entries, so we need to flush those as
12780Sstevel@tonic-gate  * well by looking for gateway addresses matching the IRE's address.
12790Sstevel@tonic-gate  */
12800Sstevel@tonic-gate void
12810Sstevel@tonic-gate ire_flush_cache_v6(ire_t *ire, int flag)
12820Sstevel@tonic-gate {
12830Sstevel@tonic-gate 	int i;
12840Sstevel@tonic-gate 	ire_t *cire;
12850Sstevel@tonic-gate 	irb_t *irb;
12863448Sdh155122 	ip_stack_t	*ipst = ire->ire_ipst;
12870Sstevel@tonic-gate 
12880Sstevel@tonic-gate 	if (ire->ire_type & IRE_CACHE)
12890Sstevel@tonic-gate 	    return;
12900Sstevel@tonic-gate 
12910Sstevel@tonic-gate 	/*
12920Sstevel@tonic-gate 	 * If a default is just created, there is no point
12930Sstevel@tonic-gate 	 * in going through the cache, as there will not be any
12940Sstevel@tonic-gate 	 * cached ires.
12950Sstevel@tonic-gate 	 */
12960Sstevel@tonic-gate 	if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD)
12970Sstevel@tonic-gate 		return;
12980Sstevel@tonic-gate 	if (flag == IRE_FLUSH_ADD) {
12990Sstevel@tonic-gate 		/*
13000Sstevel@tonic-gate 		 * This selective flush is
13010Sstevel@tonic-gate 		 * due to the addition of
13020Sstevel@tonic-gate 		 * new IRE.
13030Sstevel@tonic-gate 		 */
13043448Sdh155122 		for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
13053448Sdh155122 			irb = &ipst->ips_ip_cache_table_v6[i];
13060Sstevel@tonic-gate 			if ((cire = irb->irb_ire) == NULL)
13070Sstevel@tonic-gate 				continue;
13080Sstevel@tonic-gate 			IRB_REFHOLD(irb);
13090Sstevel@tonic-gate 			for (cire = irb->irb_ire; cire != NULL;
13100Sstevel@tonic-gate 			    cire = cire->ire_next) {
13110Sstevel@tonic-gate 				if (cire->ire_type != IRE_CACHE)
13120Sstevel@tonic-gate 					continue;
13130Sstevel@tonic-gate 				/*
13140Sstevel@tonic-gate 				 * If 'cire' belongs to the same subnet
13150Sstevel@tonic-gate 				 * as the new ire being added, and 'cire'
13160Sstevel@tonic-gate 				 * is derived from a prefix that is less
13170Sstevel@tonic-gate 				 * specific than the new ire being added,
13180Sstevel@tonic-gate 				 * we need to flush 'cire'; for instance,
13190Sstevel@tonic-gate 				 * when a new interface comes up.
13200Sstevel@tonic-gate 				 */
13210Sstevel@tonic-gate 				if ((V6_MASK_EQ_2(cire->ire_addr_v6,
13220Sstevel@tonic-gate 				    ire->ire_mask_v6, ire->ire_addr_v6) &&
13230Sstevel@tonic-gate 				    (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <=
13240Sstevel@tonic-gate 				    ire->ire_masklen))) {
13250Sstevel@tonic-gate 					ire_delete(cire);
13260Sstevel@tonic-gate 					continue;
13270Sstevel@tonic-gate 				}
13280Sstevel@tonic-gate 				/*
13290Sstevel@tonic-gate 				 * This is the case when the ire_gateway_addr
13300Sstevel@tonic-gate 				 * of 'cire' belongs to the same subnet as
13310Sstevel@tonic-gate 				 * the new ire being added.
13320Sstevel@tonic-gate 				 * Flushing such ires is sometimes required to
13330Sstevel@tonic-gate 				 * avoid misrouting: say we have a machine with
13340Sstevel@tonic-gate 				 * two interfaces (I1 and I2), a default router
13350Sstevel@tonic-gate 				 * R on the I1 subnet, and a host route to an
13360Sstevel@tonic-gate 				 * off-link destination D with a gateway G on
13370Sstevel@tonic-gate 				 * the I2 subnet.
13380Sstevel@tonic-gate 				 * Under normal operation, we will have an
13390Sstevel@tonic-gate 				 * on-link cache entry for G and an off-link
13400Sstevel@tonic-gate 				 * cache entry for D with G as ire_gateway_addr,
13410Sstevel@tonic-gate 				 * traffic to D will reach its destination
13420Sstevel@tonic-gate 				 * through gateway G.
13430Sstevel@tonic-gate 				 * If the administrator does 'ifconfig I2 down',
13440Sstevel@tonic-gate 				 * the cache entries for D and G will be
13450Sstevel@tonic-gate 				 * flushed. However, G will now be resolved as
13460Sstevel@tonic-gate 				 * an off-link destination using R (the default
13470Sstevel@tonic-gate 				 * router) as gateway. Then D will also be
13480Sstevel@tonic-gate 				 * resolved as an off-link destination using G
13490Sstevel@tonic-gate 				 * as gateway - this behavior is due to
13500Sstevel@tonic-gate 				 * compatibility reasons, see comment in
13510Sstevel@tonic-gate 				 * ire_ihandle_lookup_offlink(). Traffic to D
13520Sstevel@tonic-gate 				 * will go to the router R and probably won't
13530Sstevel@tonic-gate 				 * reach the destination.
13540Sstevel@tonic-gate 				 * The administrator then does 'ifconfig I2 up'.
13550Sstevel@tonic-gate 				 * Since G is on the I2 subnet, this routine
13560Sstevel@tonic-gate 				 * will flush its cache entry. It must also
13570Sstevel@tonic-gate 				 * flush the cache entry for D, otherwise
13580Sstevel@tonic-gate 				 * traffic will stay misrouted until the IRE
13590Sstevel@tonic-gate 				 * times out.
13600Sstevel@tonic-gate 				 */
13610Sstevel@tonic-gate 				if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6,
13620Sstevel@tonic-gate 				    ire->ire_mask_v6, ire->ire_addr_v6)) {
13630Sstevel@tonic-gate 					ire_delete(cire);
13640Sstevel@tonic-gate 					continue;
13650Sstevel@tonic-gate 				}
13660Sstevel@tonic-gate 			}
13670Sstevel@tonic-gate 			IRB_REFRELE(irb);
13680Sstevel@tonic-gate 		}
13690Sstevel@tonic-gate 	} else {
13700Sstevel@tonic-gate 		/*
13710Sstevel@tonic-gate 		 * delete the cache entries based on
13720Sstevel@tonic-gate 		 * handle in the IRE as this IRE is
13730Sstevel@tonic-gate 		 * being deleted/changed.
13740Sstevel@tonic-gate 		 */
13753448Sdh155122 		for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
13763448Sdh155122 			irb = &ipst->ips_ip_cache_table_v6[i];
13770Sstevel@tonic-gate 			if ((cire = irb->irb_ire) == NULL)
13780Sstevel@tonic-gate 				continue;
13790Sstevel@tonic-gate 			IRB_REFHOLD(irb);
13800Sstevel@tonic-gate 			for (cire = irb->irb_ire; cire != NULL;
13810Sstevel@tonic-gate 			    cire = cire->ire_next) {
13820Sstevel@tonic-gate 				if (cire->ire_type != IRE_CACHE)
13830Sstevel@tonic-gate 					continue;
13840Sstevel@tonic-gate 				if ((cire->ire_phandle == 0 ||
13850Sstevel@tonic-gate 				    cire->ire_phandle != ire->ire_phandle) &&
13860Sstevel@tonic-gate 				    (cire->ire_ihandle == 0 ||
13870Sstevel@tonic-gate 				    cire->ire_ihandle != ire->ire_ihandle))
13880Sstevel@tonic-gate 					continue;
13890Sstevel@tonic-gate 				ire_delete(cire);
13900Sstevel@tonic-gate 			}
13910Sstevel@tonic-gate 			IRB_REFRELE(irb);
13920Sstevel@tonic-gate 		}
13930Sstevel@tonic-gate 	}
13940Sstevel@tonic-gate }
13950Sstevel@tonic-gate 
13960Sstevel@tonic-gate /*
13970Sstevel@tonic-gate  * Matches the arguments passed with the values in the ire.
13980Sstevel@tonic-gate  *
13990Sstevel@tonic-gate  * Note: for match types that match using "ipif" passed in, ipif
14000Sstevel@tonic-gate  * must be checked for non-NULL before calling this routine.
14010Sstevel@tonic-gate  */
14020Sstevel@tonic-gate static boolean_t
14030Sstevel@tonic-gate ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask,
14041676Sjpk     const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid,
14051676Sjpk     uint32_t ihandle, const ts_label_t *tsl, int match_flags)
14060Sstevel@tonic-gate {
14070Sstevel@tonic-gate 	in6_addr_t masked_addr;
14080Sstevel@tonic-gate 	in6_addr_t gw_addr_v6;
14090Sstevel@tonic-gate 	ill_t *ire_ill = NULL, *dst_ill;
14100Sstevel@tonic-gate 	ill_t *ipif_ill = NULL;
14110Sstevel@tonic-gate 	ill_group_t *ire_ill_group = NULL;
14120Sstevel@tonic-gate 	ill_group_t *ipif_ill_group = NULL;
14130Sstevel@tonic-gate 	ipif_t	*src_ipif;
14140Sstevel@tonic-gate 
14150Sstevel@tonic-gate 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
14160Sstevel@tonic-gate 	ASSERT(addr != NULL);
14170Sstevel@tonic-gate 	ASSERT(mask != NULL);
14180Sstevel@tonic-gate 	ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL);
14190Sstevel@tonic-gate 	ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) ||
14200Sstevel@tonic-gate 	    (ipif != NULL && ipif->ipif_isv6));
14210Sstevel@tonic-gate 	ASSERT(!(match_flags & MATCH_IRE_WQ));
14220Sstevel@tonic-gate 
14230Sstevel@tonic-gate 	/*
14240Sstevel@tonic-gate 	 * HIDDEN cache entries have to be looked up specifically with
14250Sstevel@tonic-gate 	 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set
14260Sstevel@tonic-gate 	 * when the interface is FAILED or INACTIVE. In that case,
14270Sstevel@tonic-gate 	 * any IRE_CACHES that exists should be marked with
14280Sstevel@tonic-gate 	 * IRE_MARK_HIDDEN. So, we don't really need to match below
14290Sstevel@tonic-gate 	 * for IRE_MARK_HIDDEN. But we do so for consistency.
14300Sstevel@tonic-gate 	 */
14310Sstevel@tonic-gate 	if (!(match_flags & MATCH_IRE_MARK_HIDDEN) &&
14320Sstevel@tonic-gate 	    (ire->ire_marks & IRE_MARK_HIDDEN))
14330Sstevel@tonic-gate 		return (B_FALSE);
14340Sstevel@tonic-gate 
14351676Sjpk 	if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid &&
14361676Sjpk 	    ire->ire_zoneid != ALL_ZONES) {
14370Sstevel@tonic-gate 		/*
14380Sstevel@tonic-gate 		 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is
14390Sstevel@tonic-gate 		 * valid and does not match that of ire_zoneid, a failure to
14400Sstevel@tonic-gate 		 * match is reported at this point. Otherwise, since some IREs
14410Sstevel@tonic-gate 		 * that are available in the global zone can be used in local
14420Sstevel@tonic-gate 		 * zones, additional checks need to be performed:
14430Sstevel@tonic-gate 		 *
14440Sstevel@tonic-gate 		 *	IRE_CACHE and IRE_LOOPBACK entries should
14450Sstevel@tonic-gate 		 *	never be matched in this situation.
14460Sstevel@tonic-gate 		 *
14470Sstevel@tonic-gate 		 *	IRE entries that have an interface associated with them
14480Sstevel@tonic-gate 		 *	should in general not match unless they are an IRE_LOCAL
14490Sstevel@tonic-gate 		 *	or in the case when MATCH_IRE_DEFAULT has been set in
14500Sstevel@tonic-gate 		 *	the caller.  In the case of the former, checking of the
14510Sstevel@tonic-gate 		 *	other fields supplied should take place.
14520Sstevel@tonic-gate 		 *
14530Sstevel@tonic-gate 		 *	In the case where MATCH_IRE_DEFAULT has been set,
14540Sstevel@tonic-gate 		 *	all of the ipif's associated with the IRE's ill are
14550Sstevel@tonic-gate 		 *	checked to see if there is a matching zoneid.  If any
14560Sstevel@tonic-gate 		 *	one ipif has a matching zoneid, this IRE is a
14570Sstevel@tonic-gate 		 *	potential candidate so checking of the other fields
14580Sstevel@tonic-gate 		 *	takes place.
14590Sstevel@tonic-gate 		 *
14600Sstevel@tonic-gate 		 *	In the case where the IRE_INTERFACE has a usable source
14610Sstevel@tonic-gate 		 *	address (indicated by ill_usesrc_ifindex) in the
14620Sstevel@tonic-gate 		 *	correct zone then it's permitted to return this IRE
14630Sstevel@tonic-gate 		 */
14640Sstevel@tonic-gate 		if (match_flags & MATCH_IRE_ZONEONLY)
14650Sstevel@tonic-gate 			return (B_FALSE);
14660Sstevel@tonic-gate 		if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK))
14670Sstevel@tonic-gate 			return (B_FALSE);
14680Sstevel@tonic-gate 		/*
14690Sstevel@tonic-gate 		 * Note, IRE_INTERFACE can have the stq as NULL. For
14700Sstevel@tonic-gate 		 * example, if the default multicast route is tied to
14710Sstevel@tonic-gate 		 * the loopback address.
14720Sstevel@tonic-gate 		 */
14730Sstevel@tonic-gate 		if ((ire->ire_type & IRE_INTERFACE) &&
14740Sstevel@tonic-gate 		    (ire->ire_stq != NULL)) {
14750Sstevel@tonic-gate 			dst_ill = (ill_t *)ire->ire_stq->q_ptr;
14760Sstevel@tonic-gate 			/*
14770Sstevel@tonic-gate 			 * If there is a usable source address in the
14780Sstevel@tonic-gate 			 * zone, then it's ok to return an
14790Sstevel@tonic-gate 			 * IRE_INTERFACE
14800Sstevel@tonic-gate 			 */
14810Sstevel@tonic-gate 			if ((dst_ill->ill_usesrc_ifindex != 0) &&
14820Sstevel@tonic-gate 			    (src_ipif = ipif_select_source_v6(dst_ill, addr,
14832202Srk129064 			    RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid))
14840Sstevel@tonic-gate 			    != NULL) {
14850Sstevel@tonic-gate 				ip3dbg(("ire_match_args: src_ipif %p"
14860Sstevel@tonic-gate 				    " dst_ill %p", (void *)src_ipif,
14870Sstevel@tonic-gate 				    (void *)dst_ill));
14880Sstevel@tonic-gate 				ipif_refrele(src_ipif);
14890Sstevel@tonic-gate 			} else {
14900Sstevel@tonic-gate 				ip3dbg(("ire_match_args: src_ipif NULL"
14910Sstevel@tonic-gate 				    " dst_ill %p\n", (void *)dst_ill));
14920Sstevel@tonic-gate 				return (B_FALSE);
14930Sstevel@tonic-gate 			}
14940Sstevel@tonic-gate 		}
14950Sstevel@tonic-gate 		if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL &&
14960Sstevel@tonic-gate 		    !(ire->ire_type & IRE_INTERFACE)) {
14970Sstevel@tonic-gate 			ipif_t	*tipif;
14980Sstevel@tonic-gate 
14990Sstevel@tonic-gate 			if ((match_flags & MATCH_IRE_DEFAULT) == 0)
15000Sstevel@tonic-gate 				return (B_FALSE);
15010Sstevel@tonic-gate 			mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock);
15020Sstevel@tonic-gate 			for (tipif = ire->ire_ipif->ipif_ill->ill_ipif;
15030Sstevel@tonic-gate 			    tipif != NULL; tipif = tipif->ipif_next) {
15040Sstevel@tonic-gate 				if (IPIF_CAN_LOOKUP(tipif) &&
15050Sstevel@tonic-gate 				    (tipif->ipif_flags & IPIF_UP) &&
15061676Sjpk 				    (tipif->ipif_zoneid == zoneid ||
15071676Sjpk 				    tipif->ipif_zoneid == ALL_ZONES))
15080Sstevel@tonic-gate 					break;
15090Sstevel@tonic-gate 			}
15100Sstevel@tonic-gate 			mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock);
15110Sstevel@tonic-gate 			if (tipif == NULL)
15120Sstevel@tonic-gate 				return (B_FALSE);
15130Sstevel@tonic-gate 		}
15140Sstevel@tonic-gate 	}
15150Sstevel@tonic-gate 
15160Sstevel@tonic-gate 	if (match_flags & MATCH_IRE_GW) {
15170Sstevel@tonic-gate 		mutex_enter(&ire->ire_lock);
15180Sstevel@tonic-gate 		gw_addr_v6 = ire->ire_gateway_addr_v6;
15190Sstevel@tonic-gate 		mutex_exit(&ire->ire_lock);
15200Sstevel@tonic-gate 	}
15210Sstevel@tonic-gate 	/*
15220Sstevel@tonic-gate 	 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that
15230Sstevel@tonic-gate 	 * somebody wants to send out on a particular interface which
15240Sstevel@tonic-gate 	 * is given by ire_stq and hence use ire_stq to derive the ill
15250Sstevel@tonic-gate 	 * value. ire_ipif for IRE_CACHES is just the
15260Sstevel@tonic-gate 	 * means of getting a source address i.e ire_src_addr_v6 =
15270Sstevel@tonic-gate 	 * ire->ire_ipif->ipif_src_addr_v6.
15280Sstevel@tonic-gate 	 */
15290Sstevel@tonic-gate 	if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) {
15300Sstevel@tonic-gate 		ire_ill = ire_to_ill(ire);
15310Sstevel@tonic-gate 		if (ire_ill != NULL)
15320Sstevel@tonic-gate 			ire_ill_group = ire_ill->ill_group;
15330Sstevel@tonic-gate 		ipif_ill = ipif->ipif_ill;
15340Sstevel@tonic-gate 		ipif_ill_group = ipif_ill->ill_group;
15350Sstevel@tonic-gate 	}
15360Sstevel@tonic-gate 
15370Sstevel@tonic-gate 	/* No ire_addr_v6 bits set past the mask */
15380Sstevel@tonic-gate 	ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6,
15390Sstevel@tonic-gate 	    ire->ire_addr_v6));
15400Sstevel@tonic-gate 	V6_MASK_COPY(*addr, *mask, masked_addr);
15410Sstevel@tonic-gate 
15420Sstevel@tonic-gate 	if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) &&
15430Sstevel@tonic-gate 	    ((!(match_flags & MATCH_IRE_GW)) ||
15440Sstevel@tonic-gate 		IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) &&
15450Sstevel@tonic-gate 	    ((!(match_flags & MATCH_IRE_TYPE)) ||
15460Sstevel@tonic-gate 		(ire->ire_type & type)) &&
15470Sstevel@tonic-gate 	    ((!(match_flags & MATCH_IRE_SRC)) ||
15480Sstevel@tonic-gate 		IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6,
15490Sstevel@tonic-gate 		&ipif->ipif_v6src_addr)) &&
15500Sstevel@tonic-gate 	    ((!(match_flags & MATCH_IRE_IPIF)) ||
15510Sstevel@tonic-gate 		(ire->ire_ipif == ipif)) &&
15520Sstevel@tonic-gate 	    ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) ||
15530Sstevel@tonic-gate 		(ire->ire_type != IRE_CACHE ||
15540Sstevel@tonic-gate 		ire->ire_marks & IRE_MARK_HIDDEN)) &&
15550Sstevel@tonic-gate 	    ((!(match_flags & MATCH_IRE_ILL)) ||
15560Sstevel@tonic-gate 		(ire_ill == ipif_ill)) &&
15570Sstevel@tonic-gate 	    ((!(match_flags & MATCH_IRE_IHANDLE)) ||
15580Sstevel@tonic-gate 		(ire->ire_ihandle == ihandle)) &&
15590Sstevel@tonic-gate 	    ((!(match_flags & MATCH_IRE_ILL_GROUP)) ||
15600Sstevel@tonic-gate 		(ire_ill == ipif_ill) ||
15610Sstevel@tonic-gate 		(ire_ill_group != NULL &&
15621676Sjpk 		ire_ill_group == ipif_ill_group)) &&
15631676Sjpk 	    ((!(match_flags & MATCH_IRE_SECATTR)) ||
15641676Sjpk 		(!is_system_labeled()) ||
15651676Sjpk 		(tsol_ire_match_gwattr(ire, tsl) == 0))) {
15660Sstevel@tonic-gate 		/* We found the matched IRE */
15670Sstevel@tonic-gate 		return (B_TRUE);
15680Sstevel@tonic-gate 	}
15690Sstevel@tonic-gate 	return (B_FALSE);
15700Sstevel@tonic-gate }
15710Sstevel@tonic-gate 
15720Sstevel@tonic-gate /*
15730Sstevel@tonic-gate  * Lookup for a route in all the tables
15740Sstevel@tonic-gate  */
15750Sstevel@tonic-gate ire_t *
15760Sstevel@tonic-gate ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
15771676Sjpk     const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire,
15783448Sdh155122     zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst)
15790Sstevel@tonic-gate {
15800Sstevel@tonic-gate 	ire_t *ire = NULL;
15810Sstevel@tonic-gate 
15820Sstevel@tonic-gate 	/*
15830Sstevel@tonic-gate 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
15840Sstevel@tonic-gate 	 * MATCH_IRE_ILL is set.
15850Sstevel@tonic-gate 	 */
15860Sstevel@tonic-gate 	if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
15870Sstevel@tonic-gate 	    (ipif == NULL))
15880Sstevel@tonic-gate 		return (NULL);
15890Sstevel@tonic-gate 
15900Sstevel@tonic-gate 	/*
15910Sstevel@tonic-gate 	 * might be asking for a cache lookup,
15920Sstevel@tonic-gate 	 * This is not best way to lookup cache,
15930Sstevel@tonic-gate 	 * user should call ire_cache_lookup directly.
15940Sstevel@tonic-gate 	 *
15950Sstevel@tonic-gate 	 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then
15960Sstevel@tonic-gate 	 * in the forwarding table, if the applicable type flags were set.
15970Sstevel@tonic-gate 	 */
15980Sstevel@tonic-gate 	if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) {
15990Sstevel@tonic-gate 		ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid,
16003448Sdh155122 		    tsl, flags, ipst);
16010Sstevel@tonic-gate 		if (ire != NULL)
16020Sstevel@tonic-gate 			return (ire);
16030Sstevel@tonic-gate 	}
16040Sstevel@tonic-gate 	if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) {
16050Sstevel@tonic-gate 		ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif,
16063448Sdh155122 		    pire, zoneid, 0, tsl, flags, ipst);
16070Sstevel@tonic-gate 	}
16080Sstevel@tonic-gate 	return (ire);
16090Sstevel@tonic-gate }
16100Sstevel@tonic-gate 
16110Sstevel@tonic-gate /*
16120Sstevel@tonic-gate  * Lookup a route in forwarding table.
16130Sstevel@tonic-gate  * specific lookup is indicated by passing the
16140Sstevel@tonic-gate  * required parameters and indicating the
16150Sstevel@tonic-gate  * match required in flag field.
16160Sstevel@tonic-gate  *
16170Sstevel@tonic-gate  * Looking for default route can be done in three ways
16180Sstevel@tonic-gate  * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field
16190Sstevel@tonic-gate  *    along with other matches.
16200Sstevel@tonic-gate  * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags
16210Sstevel@tonic-gate  *    field along with other matches.
16220Sstevel@tonic-gate  * 3) if the destination and mask are passed as zeros.
16230Sstevel@tonic-gate  *
16240Sstevel@tonic-gate  * A request to return a default route if no route
16250Sstevel@tonic-gate  * is found, can be specified by setting MATCH_IRE_DEFAULT
16260Sstevel@tonic-gate  * in flags.
16270Sstevel@tonic-gate  *
16280Sstevel@tonic-gate  * It does not support recursion more than one level. It
16290Sstevel@tonic-gate  * will do recursive lookup only when the lookup maps to
16300Sstevel@tonic-gate  * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed.
16310Sstevel@tonic-gate  *
16320Sstevel@tonic-gate  * If the routing table is setup to allow more than one level
16330Sstevel@tonic-gate  * of recursion, the cleaning up cache table will not work resulting
16340Sstevel@tonic-gate  * in invalid routing.
16350Sstevel@tonic-gate  *
16360Sstevel@tonic-gate  * Supports link-local addresses by following the ipif/ill when recursing.
16370Sstevel@tonic-gate  *
16380Sstevel@tonic-gate  * NOTE : When this function returns NULL, pire has already been released.
16390Sstevel@tonic-gate  *	  pire is valid only when this function successfully returns an
16400Sstevel@tonic-gate  *	  ire.
16410Sstevel@tonic-gate  */
16420Sstevel@tonic-gate ire_t *
16430Sstevel@tonic-gate ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
16441676Sjpk     const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire,
16453448Sdh155122     zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags,
16463448Sdh155122     ip_stack_t *ipst)
16470Sstevel@tonic-gate {
16480Sstevel@tonic-gate 	irb_t *irb_ptr;
16490Sstevel@tonic-gate 	ire_t	*rire;
16500Sstevel@tonic-gate 	ire_t *ire = NULL;
16510Sstevel@tonic-gate 	ire_t	*saved_ire;
16520Sstevel@tonic-gate 	nce_t	*nce;
16530Sstevel@tonic-gate 	int i;
16540Sstevel@tonic-gate 	in6_addr_t gw_addr_v6;
16550Sstevel@tonic-gate 
16560Sstevel@tonic-gate 	ASSERT(addr != NULL);
16570Sstevel@tonic-gate 	ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL);
16580Sstevel@tonic-gate 	ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL);
16590Sstevel@tonic-gate 	ASSERT(ipif == NULL || ipif->ipif_isv6);
16600Sstevel@tonic-gate 	ASSERT(!(flags & MATCH_IRE_WQ));
16610Sstevel@tonic-gate 
16620Sstevel@tonic-gate 	/*
16630Sstevel@tonic-gate 	 * When we return NULL from this function, we should make
16640Sstevel@tonic-gate 	 * sure that *pire is NULL so that the callers will not
16650Sstevel@tonic-gate 	 * wrongly REFRELE the pire.
16660Sstevel@tonic-gate 	 */
16670Sstevel@tonic-gate 	if (pire != NULL)
16680Sstevel@tonic-gate 		*pire = NULL;
16690Sstevel@tonic-gate 	/*
16700Sstevel@tonic-gate 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
16710Sstevel@tonic-gate 	 * MATCH_IRE_ILL is set.
16720Sstevel@tonic-gate 	 */
16730Sstevel@tonic-gate 	if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
16740Sstevel@tonic-gate 	    (ipif == NULL))
16750Sstevel@tonic-gate 		return (NULL);
16760Sstevel@tonic-gate 
16770Sstevel@tonic-gate 	/*
16780Sstevel@tonic-gate 	 * If the mask is known, the lookup
16790Sstevel@tonic-gate 	 * is simple, if the mask is not known
16800Sstevel@tonic-gate 	 * we need to search.
16810Sstevel@tonic-gate 	 */
16820Sstevel@tonic-gate 	if (flags & MATCH_IRE_MASK) {
16830Sstevel@tonic-gate 		uint_t masklen;
16840Sstevel@tonic-gate 
16850Sstevel@tonic-gate 		masklen = ip_mask_to_plen_v6(mask);
16863448Sdh155122 		if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL)
16870Sstevel@tonic-gate 			return (NULL);
16883448Sdh155122 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][
16893448Sdh155122 		    IRE_ADDR_MASK_HASH_V6(*addr, *mask,
16903448Sdh155122 			ipst->ips_ip6_ftable_hash_size)]);
16910Sstevel@tonic-gate 		rw_enter(&irb_ptr->irb_lock, RW_READER);
16920Sstevel@tonic-gate 		for (ire = irb_ptr->irb_ire; ire != NULL;
16930Sstevel@tonic-gate 		    ire = ire->ire_next) {
16940Sstevel@tonic-gate 			if (ire->ire_marks & IRE_MARK_CONDEMNED)
16950Sstevel@tonic-gate 				continue;
16960Sstevel@tonic-gate 			if (ire_match_args_v6(ire, addr, mask, gateway, type,
16971676Sjpk 			    ipif, zoneid, ihandle, tsl, flags))
16980Sstevel@tonic-gate 				goto found_ire;
16990Sstevel@tonic-gate 		}
17000Sstevel@tonic-gate 		rw_exit(&irb_ptr->irb_lock);
17010Sstevel@tonic-gate 	} else {
17020Sstevel@tonic-gate 		/*
17030Sstevel@tonic-gate 		 * In this case we don't know the mask, we need to
17040Sstevel@tonic-gate 		 * search the table assuming different mask sizes.
17050Sstevel@tonic-gate 		 * we start with 128 bit mask, we don't allow default here.
17060Sstevel@tonic-gate 		 */
17070Sstevel@tonic-gate 		for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) {
17080Sstevel@tonic-gate 			in6_addr_t tmpmask;
17090Sstevel@tonic-gate 
17103448Sdh155122 			if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL)
17110Sstevel@tonic-gate 				continue;
17120Sstevel@tonic-gate 			(void) ip_plen_to_mask_v6(i, &tmpmask);
17133448Sdh155122 			irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][
17140Sstevel@tonic-gate 			    IRE_ADDR_MASK_HASH_V6(*addr, tmpmask,
17153448Sdh155122 			    ipst->ips_ip6_ftable_hash_size)];
17160Sstevel@tonic-gate 			rw_enter(&irb_ptr->irb_lock, RW_READER);
17170Sstevel@tonic-gate 			for (ire = irb_ptr->irb_ire; ire != NULL;
17180Sstevel@tonic-gate 			    ire = ire->ire_next) {
17190Sstevel@tonic-gate 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
17200Sstevel@tonic-gate 					continue;
17210Sstevel@tonic-gate 				if (ire_match_args_v6(ire, addr,
17220Sstevel@tonic-gate 				    &ire->ire_mask_v6, gateway, type, ipif,
17231676Sjpk 				    zoneid, ihandle, tsl, flags))
17240Sstevel@tonic-gate 					goto found_ire;
17250Sstevel@tonic-gate 			}
17260Sstevel@tonic-gate 			rw_exit(&irb_ptr->irb_lock);
17270Sstevel@tonic-gate 		}
17280Sstevel@tonic-gate 	}
17290Sstevel@tonic-gate 
17300Sstevel@tonic-gate 	/*
17310Sstevel@tonic-gate 	 * We come here if no route has yet been found.
17320Sstevel@tonic-gate 	 *
17330Sstevel@tonic-gate 	 * Handle the case where default route is
17340Sstevel@tonic-gate 	 * requested by specifying type as one of the possible
17350Sstevel@tonic-gate 	 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE).
17360Sstevel@tonic-gate 	 *
17370Sstevel@tonic-gate 	 * If MATCH_IRE_MASK is specified, then the appropriate default route
17380Sstevel@tonic-gate 	 * would have been found above if it exists so it isn't looked up here.
17390Sstevel@tonic-gate 	 * If MATCH_IRE_DEFAULT was also specified, then a default route will be
17400Sstevel@tonic-gate 	 * searched for later.
17410Sstevel@tonic-gate 	 */
17420Sstevel@tonic-gate 	if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE &&
17430Sstevel@tonic-gate 	    (type & (IRE_DEFAULT | IRE_INTERFACE))) {
17443448Sdh155122 		if (ipst->ips_ip_forwarding_table_v6[0] != NULL) {
17450Sstevel@tonic-gate 			/* addr & mask is zero for defaults */
17463448Sdh155122 			irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][
17470Sstevel@tonic-gate 			    IRE_ADDR_HASH_V6(ipv6_all_zeros,
17483448Sdh155122 			    ipst->ips_ip6_ftable_hash_size)];
17490Sstevel@tonic-gate 			rw_enter(&irb_ptr->irb_lock, RW_READER);
17500Sstevel@tonic-gate 			for (ire = irb_ptr->irb_ire; ire != NULL;
17510Sstevel@tonic-gate 			    ire = ire->ire_next) {
17520Sstevel@tonic-gate 
17530Sstevel@tonic-gate 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
17540Sstevel@tonic-gate 					continue;
17550Sstevel@tonic-gate 
17560Sstevel@tonic-gate 				if (ire_match_args_v6(ire, addr,
17570Sstevel@tonic-gate 				    &ipv6_all_zeros, gateway, type, ipif,
17581676Sjpk 				    zoneid, ihandle, tsl, flags))
17590Sstevel@tonic-gate 					goto found_ire;
17600Sstevel@tonic-gate 			}
17610Sstevel@tonic-gate 			rw_exit(&irb_ptr->irb_lock);
17620Sstevel@tonic-gate 		}
17630Sstevel@tonic-gate 	}
17640Sstevel@tonic-gate 	/*
17650Sstevel@tonic-gate 	 * We come here only if no route is found.
17660Sstevel@tonic-gate 	 * see if the default route can be used which is allowed
17670Sstevel@tonic-gate 	 * only if the default matching criteria is specified.
17680Sstevel@tonic-gate 	 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT
17690Sstevel@tonic-gate 	 * entries. However, the ip_forwarding_table_v6[0] also contains
17700Sstevel@tonic-gate 	 * interface routes thus the count can be zero.
17710Sstevel@tonic-gate 	 */
17720Sstevel@tonic-gate 	saved_ire = NULL;
17730Sstevel@tonic-gate 	if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) ==
17740Sstevel@tonic-gate 	    MATCH_IRE_DEFAULT) {
17750Sstevel@tonic-gate 		ire_t	*ire_origin;
17760Sstevel@tonic-gate 		uint_t	g_index;
17770Sstevel@tonic-gate 		uint_t	index;
17780Sstevel@tonic-gate 
17793448Sdh155122 		if (ipst->ips_ip_forwarding_table_v6[0] == NULL)
17800Sstevel@tonic-gate 			return (NULL);
17813448Sdh155122 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0];
17820Sstevel@tonic-gate 
17830Sstevel@tonic-gate 		/*
17840Sstevel@tonic-gate 		 * Keep a tab on the bucket while looking the IRE_DEFAULT
17850Sstevel@tonic-gate 		 * entries. We need to keep track of a particular IRE
17860Sstevel@tonic-gate 		 * (ire_origin) so this ensures that it will not be unlinked
17870Sstevel@tonic-gate 		 * from the hash list during the recursive lookup below.
17880Sstevel@tonic-gate 		 */
17890Sstevel@tonic-gate 		IRB_REFHOLD(irb_ptr);
17900Sstevel@tonic-gate 		ire = irb_ptr->irb_ire;
17910Sstevel@tonic-gate 		if (ire == NULL) {
17920Sstevel@tonic-gate 			IRB_REFRELE(irb_ptr);
17930Sstevel@tonic-gate 			return (NULL);
17940Sstevel@tonic-gate 		}
17950Sstevel@tonic-gate 
17960Sstevel@tonic-gate 		/*
17970Sstevel@tonic-gate 		 * Get the index first, since it can be changed by other
17980Sstevel@tonic-gate 		 * threads. Then get to the right default route skipping
17990Sstevel@tonic-gate 		 * default interface routes if any. As we hold a reference on
18000Sstevel@tonic-gate 		 * the IRE bucket, ipv6_ire_default_count can only increase so
18010Sstevel@tonic-gate 		 * we can't reach the end of the hash list unexpectedly.
18020Sstevel@tonic-gate 		 */
18033448Sdh155122 		if (ipst->ips_ipv6_ire_default_count != 0) {
18043448Sdh155122 			g_index = ipst->ips_ipv6_ire_default_index++;
18053448Sdh155122 			index = g_index % ipst->ips_ipv6_ire_default_count;
18060Sstevel@tonic-gate 			while (index != 0) {
18070Sstevel@tonic-gate 				if (!(ire->ire_type & IRE_INTERFACE))
18080Sstevel@tonic-gate 					index--;
18090Sstevel@tonic-gate 				ire = ire->ire_next;
18100Sstevel@tonic-gate 			}
18110Sstevel@tonic-gate 			ASSERT(ire != NULL);
18120Sstevel@tonic-gate 		} else {
18130Sstevel@tonic-gate 			/*
18140Sstevel@tonic-gate 			 * No default route, so we only have default interface
18150Sstevel@tonic-gate 			 * routes: don't enter the first loop.
18160Sstevel@tonic-gate 			 */
18170Sstevel@tonic-gate 			ire = NULL;
18180Sstevel@tonic-gate 		}
18190Sstevel@tonic-gate 
18200Sstevel@tonic-gate 		/*
18210Sstevel@tonic-gate 		 * Round-robin the default routers list looking for a neighbor
18220Sstevel@tonic-gate 		 * that matches the passed in parameters and is reachable.  If
18230Sstevel@tonic-gate 		 * none found, just return a route from the default router list
18240Sstevel@tonic-gate 		 * if it exists. If we can't find a default route (IRE_DEFAULT),
18250Sstevel@tonic-gate 		 * look for interface default routes.
18260Sstevel@tonic-gate 		 * We start with the ire we found above and we walk the hash
18270Sstevel@tonic-gate 		 * list until we're back where we started, see
18280Sstevel@tonic-gate 		 * ire_get_next_default_ire(). It doesn't matter if default
18290Sstevel@tonic-gate 		 * routes are added or deleted by other threads - we know this
18300Sstevel@tonic-gate 		 * ire will stay in the list because we hold a reference on the
18310Sstevel@tonic-gate 		 * ire bucket.
18320Sstevel@tonic-gate 		 * NB: if we only have interface default routes, ire is NULL so
18330Sstevel@tonic-gate 		 * we don't even enter this loop (see above).
18340Sstevel@tonic-gate 		 */
18350Sstevel@tonic-gate 		ire_origin = ire;
18360Sstevel@tonic-gate 		for (; ire != NULL;
18370Sstevel@tonic-gate 		    ire = ire_get_next_default_ire(ire, ire_origin)) {
18380Sstevel@tonic-gate 
18390Sstevel@tonic-gate 			if (ire_match_args_v6(ire, addr,
18400Sstevel@tonic-gate 			    &ipv6_all_zeros, gateway, type, ipif,
18411676Sjpk 			    zoneid, ihandle, tsl, flags)) {
18420Sstevel@tonic-gate 				int match_flags;
18430Sstevel@tonic-gate 
18440Sstevel@tonic-gate 				/*
18450Sstevel@tonic-gate 				 * We have something to work with.
18460Sstevel@tonic-gate 				 * If we can find a resolved/reachable
18470Sstevel@tonic-gate 				 * entry, we will use this. Otherwise
18480Sstevel@tonic-gate 				 * we'll try to find an entry that has
18490Sstevel@tonic-gate 				 * a resolved cache entry. We will fallback
18500Sstevel@tonic-gate 				 * on this if we don't find anything else.
18510Sstevel@tonic-gate 				 */
18520Sstevel@tonic-gate 				if (saved_ire == NULL)
18530Sstevel@tonic-gate 					saved_ire = ire;
18540Sstevel@tonic-gate 				mutex_enter(&ire->ire_lock);
18550Sstevel@tonic-gate 				gw_addr_v6 = ire->ire_gateway_addr_v6;
18560Sstevel@tonic-gate 				mutex_exit(&ire->ire_lock);
18571676Sjpk 				match_flags = MATCH_IRE_ILL_GROUP |
18581676Sjpk 				    MATCH_IRE_SECATTR;
18590Sstevel@tonic-gate 				rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL,
18603448Sdh155122 				    0, ire->ire_ipif, zoneid, tsl, match_flags,
18613448Sdh155122 				    ipst);
18620Sstevel@tonic-gate 				if (rire != NULL) {
18630Sstevel@tonic-gate 					nce = rire->ire_nce;
18640Sstevel@tonic-gate 					if (nce != NULL &&
18650Sstevel@tonic-gate 					    NCE_ISREACHABLE(nce) &&
18660Sstevel@tonic-gate 					    nce->nce_flags & NCE_F_ISROUTER) {
18670Sstevel@tonic-gate 						ire_refrele(rire);
18680Sstevel@tonic-gate 						IRE_REFHOLD(ire);
18690Sstevel@tonic-gate 						IRB_REFRELE(irb_ptr);
18700Sstevel@tonic-gate 						goto found_ire_held;
18710Sstevel@tonic-gate 					} else if (nce != NULL &&
18720Sstevel@tonic-gate 					    !(nce->nce_flags &
18730Sstevel@tonic-gate 					    NCE_F_ISROUTER)) {
18740Sstevel@tonic-gate 						/*
18750Sstevel@tonic-gate 						 * Make sure we don't use
18760Sstevel@tonic-gate 						 * this ire
18770Sstevel@tonic-gate 						 */
18780Sstevel@tonic-gate 						if (saved_ire == ire)
18790Sstevel@tonic-gate 							saved_ire = NULL;
18800Sstevel@tonic-gate 					}
18810Sstevel@tonic-gate 					ire_refrele(rire);
18823448Sdh155122 				} else if (ipst->
18833448Sdh155122 				    ips_ipv6_ire_default_count > 1 &&
1884*4333Snordmark 				    zoneid != GLOBAL_ZONEID) {
18850Sstevel@tonic-gate 					/*
18860Sstevel@tonic-gate 					 * When we're in a local zone, we're
18870Sstevel@tonic-gate 					 * only interested in default routers
18880Sstevel@tonic-gate 					 * that are reachable through ipifs
18890Sstevel@tonic-gate 					 * within our zone.
18900Sstevel@tonic-gate 					 * The potentially expensive call to
18910Sstevel@tonic-gate 					 * ire_route_lookup_v6() is avoided when
18920Sstevel@tonic-gate 					 * we have only one default route.
18930Sstevel@tonic-gate 					 */
1894*4333Snordmark 					int ire_match_flags = MATCH_IRE_TYPE |
1895*4333Snordmark 					    MATCH_IRE_SECATTR;
1896*4333Snordmark 
1897*4333Snordmark 					if (ire->ire_ipif != NULL) {
1898*4333Snordmark 						ire_match_flags |=
1899*4333Snordmark 						    MATCH_IRE_ILL_GROUP;
1900*4333Snordmark 					}
19010Sstevel@tonic-gate 					rire = ire_route_lookup_v6(&gw_addr_v6,
19022733Snordmark 					    NULL, NULL, IRE_INTERFACE,
19032733Snordmark 					    ire->ire_ipif, NULL,
1904*4333Snordmark 					    zoneid, tsl, ire_match_flags, ipst);
19050Sstevel@tonic-gate 					if (rire != NULL) {
19060Sstevel@tonic-gate 						ire_refrele(rire);
19070Sstevel@tonic-gate 						saved_ire = ire;
19080Sstevel@tonic-gate 					} else if (saved_ire == ire) {
19090Sstevel@tonic-gate 						/*
19100Sstevel@tonic-gate 						 * Make sure we don't use
19110Sstevel@tonic-gate 						 * this ire
19120Sstevel@tonic-gate 						 */
19130Sstevel@tonic-gate 						saved_ire = NULL;
19140Sstevel@tonic-gate 					}
19150Sstevel@tonic-gate 				}
19160Sstevel@tonic-gate 			}
19170Sstevel@tonic-gate 		}
19180Sstevel@tonic-gate 		if (saved_ire != NULL) {
19190Sstevel@tonic-gate 			ire = saved_ire;
19200Sstevel@tonic-gate 			IRE_REFHOLD(ire);
19210Sstevel@tonic-gate 			IRB_REFRELE(irb_ptr);
19220Sstevel@tonic-gate 			goto found_ire_held;
19230Sstevel@tonic-gate 		} else {
19240Sstevel@tonic-gate 			/*
19250Sstevel@tonic-gate 			 * Look for a interface default route matching the
19260Sstevel@tonic-gate 			 * args passed in. No round robin here. Just pick
19270Sstevel@tonic-gate 			 * the right one.
19280Sstevel@tonic-gate 			 */
19290Sstevel@tonic-gate 			for (ire = irb_ptr->irb_ire; ire != NULL;
19300Sstevel@tonic-gate 			    ire = ire->ire_next) {
19310Sstevel@tonic-gate 
19320Sstevel@tonic-gate 				if (!(ire->ire_type & IRE_INTERFACE))
19330Sstevel@tonic-gate 					continue;
19340Sstevel@tonic-gate 
19350Sstevel@tonic-gate 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
19360Sstevel@tonic-gate 					continue;
19370Sstevel@tonic-gate 
19380Sstevel@tonic-gate 				if (ire_match_args_v6(ire, addr,
19390Sstevel@tonic-gate 				    &ipv6_all_zeros, gateway, type, ipif,
19401676Sjpk 				    zoneid, ihandle, tsl, flags)) {
19410Sstevel@tonic-gate 					IRE_REFHOLD(ire);
19420Sstevel@tonic-gate 					IRB_REFRELE(irb_ptr);
19430Sstevel@tonic-gate 					goto found_ire_held;
19440Sstevel@tonic-gate 				}
19450Sstevel@tonic-gate 			}
19460Sstevel@tonic-gate 			IRB_REFRELE(irb_ptr);
19470Sstevel@tonic-gate 		}
19480Sstevel@tonic-gate 	}
19490Sstevel@tonic-gate 	ASSERT(ire == NULL);
19500Sstevel@tonic-gate 	ip1dbg(("ire_ftable_lookup_v6: returning NULL ire"));
19510Sstevel@tonic-gate 	return (NULL);
19520Sstevel@tonic-gate found_ire:
19530Sstevel@tonic-gate 	ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0);
19540Sstevel@tonic-gate 	IRE_REFHOLD(ire);
19550Sstevel@tonic-gate 	rw_exit(&irb_ptr->irb_lock);
19560Sstevel@tonic-gate 
19570Sstevel@tonic-gate found_ire_held:
19580Sstevel@tonic-gate 	if ((flags & MATCH_IRE_RJ_BHOLE) &&
19590Sstevel@tonic-gate 	    (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) {
19600Sstevel@tonic-gate 		return (ire);
19610Sstevel@tonic-gate 	}
19620Sstevel@tonic-gate 	/*
19630Sstevel@tonic-gate 	 * At this point, IRE that was found must be an IRE_FORWARDTABLE
19640Sstevel@tonic-gate 	 * or IRE_CACHETABLE type.  If this is a recursive lookup and an
19650Sstevel@tonic-gate 	 * IRE_INTERFACE type was found, return that.  If it was some other
19660Sstevel@tonic-gate 	 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it
19670Sstevel@tonic-gate 	 * is necessary to fill in the  parent IRE pointed to by pire, and
19680Sstevel@tonic-gate 	 * then lookup the gateway address of  the parent.  For backwards
19690Sstevel@tonic-gate 	 * compatiblity, if this lookup returns an
19700Sstevel@tonic-gate 	 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level
19710Sstevel@tonic-gate 	 * of lookup is done.
19720Sstevel@tonic-gate 	 */
19730Sstevel@tonic-gate 	if (flags & MATCH_IRE_RECURSIVE) {
19741676Sjpk 		const ipif_t *gw_ipif;
19750Sstevel@tonic-gate 		int match_flags = MATCH_IRE_DSTONLY;
19760Sstevel@tonic-gate 
19770Sstevel@tonic-gate 		if (ire->ire_type & IRE_INTERFACE)
19780Sstevel@tonic-gate 			return (ire);
19790Sstevel@tonic-gate 		if (pire != NULL)
19800Sstevel@tonic-gate 			*pire = ire;
19810Sstevel@tonic-gate 		/*
19820Sstevel@tonic-gate 		 * If we can't find an IRE_INTERFACE or the caller has not
19830Sstevel@tonic-gate 		 * asked for pire, we need to REFRELE the saved_ire.
19840Sstevel@tonic-gate 		 */
19850Sstevel@tonic-gate 		saved_ire = ire;
19860Sstevel@tonic-gate 
19870Sstevel@tonic-gate 		/*
19880Sstevel@tonic-gate 		 * Currently MATCH_IRE_ILL is never used with
19890Sstevel@tonic-gate 		 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while
19900Sstevel@tonic-gate 		 * sending out packets as MATCH_IRE_ILL is used only
19910Sstevel@tonic-gate 		 * for communicating with on-link hosts. We can't assert
19920Sstevel@tonic-gate 		 * that here as RTM_GET calls this function with
19930Sstevel@tonic-gate 		 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE.
19940Sstevel@tonic-gate 		 * We have already used the MATCH_IRE_ILL in determining
19950Sstevel@tonic-gate 		 * the right prefix route at this point. To match the
19960Sstevel@tonic-gate 		 * behavior of how we locate routes while sending out
19970Sstevel@tonic-gate 		 * packets, we don't want to use MATCH_IRE_ILL below
19980Sstevel@tonic-gate 		 * while locating the interface route.
19990Sstevel@tonic-gate 		 */
20000Sstevel@tonic-gate 		if (ire->ire_ipif != NULL)
20010Sstevel@tonic-gate 			match_flags |= MATCH_IRE_ILL_GROUP;
20020Sstevel@tonic-gate 
20030Sstevel@tonic-gate 		mutex_enter(&ire->ire_lock);
20040Sstevel@tonic-gate 		gw_addr_v6 = ire->ire_gateway_addr_v6;
20050Sstevel@tonic-gate 		mutex_exit(&ire->ire_lock);
20060Sstevel@tonic-gate 
20070Sstevel@tonic-gate 		ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0,
20083448Sdh155122 		    ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst);
20090Sstevel@tonic-gate 		if (ire == NULL) {
20100Sstevel@tonic-gate 			/*
20110Sstevel@tonic-gate 			 * In this case we have to deal with the
20120Sstevel@tonic-gate 			 * MATCH_IRE_PARENT flag, which means the
20130Sstevel@tonic-gate 			 * parent has to be returned if ire is NULL.
20140Sstevel@tonic-gate 			 * The aim of this is to have (at least) a starting
20150Sstevel@tonic-gate 			 * ire when we want to look at all of the ires in a
20160Sstevel@tonic-gate 			 * bucket aimed at a single destination (as is the
20170Sstevel@tonic-gate 			 * case in ip_newroute_v6 for the RTF_MULTIRT
20180Sstevel@tonic-gate 			 * flagged routes).
20190Sstevel@tonic-gate 			 */
20200Sstevel@tonic-gate 			if (flags & MATCH_IRE_PARENT) {
20210Sstevel@tonic-gate 				if (pire != NULL) {
20220Sstevel@tonic-gate 					/*
20230Sstevel@tonic-gate 					 * Need an extra REFHOLD, if the
20240Sstevel@tonic-gate 					 * parent ire is returned via both
20250Sstevel@tonic-gate 					 * ire and pire.
20260Sstevel@tonic-gate 					 */
20270Sstevel@tonic-gate 					IRE_REFHOLD(saved_ire);
20280Sstevel@tonic-gate 				}
20290Sstevel@tonic-gate 				ire = saved_ire;
20300Sstevel@tonic-gate 			} else {
20310Sstevel@tonic-gate 				ire_refrele(saved_ire);
20320Sstevel@tonic-gate 				if (pire != NULL)
20330Sstevel@tonic-gate 					*pire = NULL;
20340Sstevel@tonic-gate 			}
20350Sstevel@tonic-gate 			return (ire);
20360Sstevel@tonic-gate 		}
20370Sstevel@tonic-gate 		if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) {
20380Sstevel@tonic-gate 			/*
20390Sstevel@tonic-gate 			 * If the caller did not ask for pire, release
20400Sstevel@tonic-gate 			 * it now.
20410Sstevel@tonic-gate 			 */
20420Sstevel@tonic-gate 			if (pire == NULL) {
20430Sstevel@tonic-gate 				ire_refrele(saved_ire);
20440Sstevel@tonic-gate 			}
20450Sstevel@tonic-gate 			return (ire);
20460Sstevel@tonic-gate 		}
20470Sstevel@tonic-gate 		match_flags |= MATCH_IRE_TYPE;
20480Sstevel@tonic-gate 		mutex_enter(&ire->ire_lock);
20490Sstevel@tonic-gate 		gw_addr_v6 = ire->ire_gateway_addr_v6;
20500Sstevel@tonic-gate 		mutex_exit(&ire->ire_lock);
20510Sstevel@tonic-gate 		gw_ipif = ire->ire_ipif;
20520Sstevel@tonic-gate 		ire_refrele(ire);
20530Sstevel@tonic-gate 		ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL,
20540Sstevel@tonic-gate 		    (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid,
20553448Sdh155122 		    NULL, match_flags, ipst);
20560Sstevel@tonic-gate 		if (ire == NULL) {
20570Sstevel@tonic-gate 			/*
20580Sstevel@tonic-gate 			 * In this case we have to deal with the
20590Sstevel@tonic-gate 			 * MATCH_IRE_PARENT flag, which means the
20600Sstevel@tonic-gate 			 * parent has to be returned if ire is NULL.
20610Sstevel@tonic-gate 			 * The aim of this is to have (at least) a starting
20620Sstevel@tonic-gate 			 * ire when we want to look at all of the ires in a
20630Sstevel@tonic-gate 			 * bucket aimed at a single destination (as is the
20640Sstevel@tonic-gate 			 * case in ip_newroute_v6 for the RTF_MULTIRT
20650Sstevel@tonic-gate 			 * flagged routes).
20660Sstevel@tonic-gate 			 */
20670Sstevel@tonic-gate 			if (flags & MATCH_IRE_PARENT) {
20680Sstevel@tonic-gate 				if (pire != NULL) {
20690Sstevel@tonic-gate 					/*
20700Sstevel@tonic-gate 					 * Need an extra REFHOLD, if the
20710Sstevel@tonic-gate 					 * parent ire is returned via both
20720Sstevel@tonic-gate 					 * ire and pire.
20730Sstevel@tonic-gate 					 */
20740Sstevel@tonic-gate 					IRE_REFHOLD(saved_ire);
20750Sstevel@tonic-gate 				}
20760Sstevel@tonic-gate 				ire = saved_ire;
20770Sstevel@tonic-gate 			} else {
20780Sstevel@tonic-gate 				ire_refrele(saved_ire);
20790Sstevel@tonic-gate 				if (pire != NULL)
20800Sstevel@tonic-gate 					*pire = NULL;
20810Sstevel@tonic-gate 			}
20820Sstevel@tonic-gate 			return (ire);
20830Sstevel@tonic-gate 		} else if (pire == NULL) {
20840Sstevel@tonic-gate 			/*
20850Sstevel@tonic-gate 			 * If the caller did not ask for pire, release
20860Sstevel@tonic-gate 			 * it now.
20870Sstevel@tonic-gate 			 */
20880Sstevel@tonic-gate 			ire_refrele(saved_ire);
20890Sstevel@tonic-gate 		}
20900Sstevel@tonic-gate 		return (ire);
20910Sstevel@tonic-gate 	}
20920Sstevel@tonic-gate 
20930Sstevel@tonic-gate 	ASSERT(pire == NULL || *pire == NULL);
20940Sstevel@tonic-gate 	return (ire);
20950Sstevel@tonic-gate }
20960Sstevel@tonic-gate 
20970Sstevel@tonic-gate /*
20981676Sjpk  * Delete the IRE cache for the gateway and all IRE caches whose
20991676Sjpk  * ire_gateway_addr_v6 points to this gateway, and allow them to
21001676Sjpk  * be created on demand by ip_newroute_v6.
21011676Sjpk  */
21021676Sjpk void
21033448Sdh155122 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid,
21043448Sdh155122 	ip_stack_t *ipst)
21051676Sjpk {
21061676Sjpk 	irb_t *irb;
21071676Sjpk 	ire_t *ire;
21081676Sjpk 
21093448Sdh155122 	irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
21103448Sdh155122 	    ipst->ips_ip6_cache_table_size)];
21111676Sjpk 	IRB_REFHOLD(irb);
21121676Sjpk 	for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
21131676Sjpk 		if (ire->ire_marks & IRE_MARK_CONDEMNED)
21141676Sjpk 			continue;
21151676Sjpk 
21161676Sjpk 		ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones));
21171676Sjpk 		if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0,
21181676Sjpk 		    IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) {
21191676Sjpk 			ire_delete(ire);
21201676Sjpk 		}
21211676Sjpk 	}
21221676Sjpk 	IRB_REFRELE(irb);
21231676Sjpk 
21243448Sdh155122 	ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst);
21251676Sjpk }
21261676Sjpk 
21271676Sjpk /*
21280Sstevel@tonic-gate  * Looks up cache table for a route.
21290Sstevel@tonic-gate  * specific lookup can be indicated by
21300Sstevel@tonic-gate  * passing the MATCH_* flags and the
21310Sstevel@tonic-gate  * necessary parameters.
21320Sstevel@tonic-gate  */
21330Sstevel@tonic-gate ire_t *
21340Sstevel@tonic-gate ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway,
21351676Sjpk     int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl,
21363448Sdh155122     int flags, ip_stack_t *ipst)
21370Sstevel@tonic-gate {
21380Sstevel@tonic-gate 	ire_t *ire;
21390Sstevel@tonic-gate 	irb_t *irb_ptr;
21400Sstevel@tonic-gate 	ASSERT(addr != NULL);
21410Sstevel@tonic-gate 	ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL);
21420Sstevel@tonic-gate 
21430Sstevel@tonic-gate 	/*
21440Sstevel@tonic-gate 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
21450Sstevel@tonic-gate 	 * MATCH_IRE_ILL is set.
21460Sstevel@tonic-gate 	 */
21470Sstevel@tonic-gate 	if ((flags & (MATCH_IRE_SRC |  MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
21480Sstevel@tonic-gate 	    (ipif == NULL))
21490Sstevel@tonic-gate 		return (NULL);
21500Sstevel@tonic-gate 
21513448Sdh155122 	irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
21523448Sdh155122 				    ipst->ips_ip6_cache_table_size)];
21530Sstevel@tonic-gate 	rw_enter(&irb_ptr->irb_lock, RW_READER);
21540Sstevel@tonic-gate 	for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) {
21550Sstevel@tonic-gate 		if (ire->ire_marks & IRE_MARK_CONDEMNED)
21560Sstevel@tonic-gate 			continue;
21570Sstevel@tonic-gate 
21580Sstevel@tonic-gate 		ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones));
21590Sstevel@tonic-gate 		if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway,
21601676Sjpk 		    type, ipif, zoneid, 0, tsl, flags)) {
21610Sstevel@tonic-gate 			IRE_REFHOLD(ire);
21620Sstevel@tonic-gate 			rw_exit(&irb_ptr->irb_lock);
21630Sstevel@tonic-gate 			return (ire);
21640Sstevel@tonic-gate 		}
21650Sstevel@tonic-gate 	}
21660Sstevel@tonic-gate 	rw_exit(&irb_ptr->irb_lock);
21670Sstevel@tonic-gate 	return (NULL);
21680Sstevel@tonic-gate }
21690Sstevel@tonic-gate 
21700Sstevel@tonic-gate /*
21710Sstevel@tonic-gate  * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers
21720Sstevel@tonic-gate  * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get
21730Sstevel@tonic-gate  * to the hidden ones.
21742733Snordmark  *
21752733Snordmark  * In general the zoneid has to match (where ALL_ZONES match all of them).
21762733Snordmark  * But for IRE_LOCAL we also need to handle the case where L2 should
21772733Snordmark  * conceptually loop back the packet. This is necessary since neither
21782733Snordmark  * Ethernet drivers nor Ethernet hardware loops back packets sent to their
21792733Snordmark  * own MAC address. This loopback is needed when the normal
21802733Snordmark  * routes (ignoring IREs with different zoneids) would send out the packet on
21812733Snordmark  * the same ill (or ill group) as the ill with which this IRE_LOCAL is
21822733Snordmark  * associated.
21832733Snordmark  *
21842733Snordmark  * Earlier versions of this code always matched an IRE_LOCAL independently of
21852733Snordmark  * the zoneid. We preserve that earlier behavior when
21862733Snordmark  * ip_restrict_interzone_loopback is turned off.
21870Sstevel@tonic-gate  */
21880Sstevel@tonic-gate ire_t *
21891676Sjpk ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid,
21903448Sdh155122     const ts_label_t *tsl, ip_stack_t *ipst)
21910Sstevel@tonic-gate {
21920Sstevel@tonic-gate 	irb_t *irb_ptr;
21930Sstevel@tonic-gate 	ire_t *ire;
21940Sstevel@tonic-gate 
21953448Sdh155122 	irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
21963448Sdh155122 	    ipst->ips_ip6_cache_table_size)];
21970Sstevel@tonic-gate 	rw_enter(&irb_ptr->irb_lock, RW_READER);
21980Sstevel@tonic-gate 	for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) {
21990Sstevel@tonic-gate 		if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN))
22000Sstevel@tonic-gate 			continue;
22010Sstevel@tonic-gate 		if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) {
22021676Sjpk 			/*
22031676Sjpk 			 * Finally, check if the security policy has any
22041676Sjpk 			 * restriction on using this route for the specified
22051676Sjpk 			 * message.
22061676Sjpk 			 */
22071676Sjpk 			if (tsl != NULL &&
22081676Sjpk 			    ire->ire_gw_secattr != NULL &&
22091676Sjpk 			    tsol_ire_match_gwattr(ire, tsl) != 0) {
22101676Sjpk 				continue;
22111676Sjpk 			}
22121676Sjpk 
22130Sstevel@tonic-gate 			if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid ||
22142733Snordmark 			    ire->ire_zoneid == ALL_ZONES) {
22152733Snordmark 				IRE_REFHOLD(ire);
22162733Snordmark 				rw_exit(&irb_ptr->irb_lock);
22172733Snordmark 				return (ire);
22182733Snordmark 			}
22192733Snordmark 
22202733Snordmark 			if (ire->ire_type == IRE_LOCAL) {
22213448Sdh155122 				if (ipst->ips_ip_restrict_interzone_loopback &&
22222733Snordmark 				    !ire_local_ok_across_zones(ire, zoneid,
22233448Sdh155122 				    (void *)addr, tsl, ipst))
22242733Snordmark 					continue;
22252733Snordmark 
22260Sstevel@tonic-gate 				IRE_REFHOLD(ire);
22270Sstevel@tonic-gate 				rw_exit(&irb_ptr->irb_lock);
22280Sstevel@tonic-gate 				return (ire);
22290Sstevel@tonic-gate 			}
22300Sstevel@tonic-gate 		}
22310Sstevel@tonic-gate 	}
22320Sstevel@tonic-gate 	rw_exit(&irb_ptr->irb_lock);
22330Sstevel@tonic-gate 	return (NULL);
22340Sstevel@tonic-gate }
22350Sstevel@tonic-gate 
22360Sstevel@tonic-gate /*
22370Sstevel@tonic-gate  * Locate the interface ire that is tied to the cache ire 'cire' via
22380Sstevel@tonic-gate  * cire->ire_ihandle.
22390Sstevel@tonic-gate  *
22400Sstevel@tonic-gate  * We are trying to create the cache ire for an onlink destn. or
22410Sstevel@tonic-gate  * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER
22420Sstevel@tonic-gate  * case for xresolv interfaces, after the ire has come back from
22430Sstevel@tonic-gate  * an external resolver.
22440Sstevel@tonic-gate  */
22450Sstevel@tonic-gate static ire_t *
22460Sstevel@tonic-gate ire_ihandle_lookup_onlink_v6(ire_t *cire)
22470Sstevel@tonic-gate {
22480Sstevel@tonic-gate 	ire_t	*ire;
22490Sstevel@tonic-gate 	int	match_flags;
22500Sstevel@tonic-gate 	int	i;
22510Sstevel@tonic-gate 	int	j;
22520Sstevel@tonic-gate 	irb_t	*irb_ptr;
22533448Sdh155122 	ip_stack_t	*ipst = cire->ire_ipst;
22540Sstevel@tonic-gate 
22550Sstevel@tonic-gate 	ASSERT(cire != NULL);
22560Sstevel@tonic-gate 
22570Sstevel@tonic-gate 	match_flags =  MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
22580Sstevel@tonic-gate 	/*
22590Sstevel@tonic-gate 	 * We know that the mask of the interface ire equals cire->ire_cmask.
22600Sstevel@tonic-gate 	 * (When ip_newroute_v6() created 'cire' for an on-link destn.
22610Sstevel@tonic-gate 	 * it set its cmask from the interface ire's mask)
22620Sstevel@tonic-gate 	 */
22630Sstevel@tonic-gate 	ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6,
22640Sstevel@tonic-gate 	    NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle,
22653448Sdh155122 	    NULL, match_flags, ipst);
22660Sstevel@tonic-gate 	if (ire != NULL)
22670Sstevel@tonic-gate 		return (ire);
22680Sstevel@tonic-gate 	/*
22690Sstevel@tonic-gate 	 * If we didn't find an interface ire above, we can't declare failure.
22700Sstevel@tonic-gate 	 * For backwards compatibility, we need to support prefix routes
22710Sstevel@tonic-gate 	 * pointing to next hop gateways that are not on-link.
22720Sstevel@tonic-gate 	 *
22730Sstevel@tonic-gate 	 * In the resolver/noresolver case, ip_newroute_v6() thinks
22740Sstevel@tonic-gate 	 * it is creating the cache ire for an onlink destination in 'cire'.
22750Sstevel@tonic-gate 	 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6()
22760Sstevel@tonic-gate 	 * cheated it, by doing ire_route_lookup_v6() twice and returning an
22770Sstevel@tonic-gate 	 * interface ire.
22780Sstevel@tonic-gate 	 *
22790Sstevel@tonic-gate 	 * Eg. default	-	gw1			(line 1)
22800Sstevel@tonic-gate 	 *	gw1	-	gw2			(line 2)
22810Sstevel@tonic-gate 	 *	gw2	-	hme0			(line 3)
22820Sstevel@tonic-gate 	 *
22830Sstevel@tonic-gate 	 * In the above example, ip_newroute_v6() tried to create the cache ire
22840Sstevel@tonic-gate 	 * 'cire' for gw1, based on the interface route in line 3. The
22850Sstevel@tonic-gate 	 * ire_ftable_lookup_v6() above fails, because there is
22860Sstevel@tonic-gate 	 * no interface route to reach gw1. (it is gw2). We fall thru below.
22870Sstevel@tonic-gate 	 *
22880Sstevel@tonic-gate 	 * Do a brute force search based on the ihandle in a subset of the
22890Sstevel@tonic-gate 	 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise
22900Sstevel@tonic-gate 	 * things become very complex, since we don't have 'pire' in this
22910Sstevel@tonic-gate 	 * case. (Also note that this method is not possible in the offlink
22920Sstevel@tonic-gate 	 * case because we don't know the mask)
22930Sstevel@tonic-gate 	 */
22940Sstevel@tonic-gate 	i = ip_mask_to_plen_v6(&cire->ire_cmask_v6);
22953448Sdh155122 	if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL)
22960Sstevel@tonic-gate 		return (NULL);
22973448Sdh155122 	for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) {
22983448Sdh155122 		irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j];
22990Sstevel@tonic-gate 		rw_enter(&irb_ptr->irb_lock, RW_READER);
23000Sstevel@tonic-gate 		for (ire = irb_ptr->irb_ire; ire != NULL;
23010Sstevel@tonic-gate 		    ire = ire->ire_next) {
23020Sstevel@tonic-gate 			if (ire->ire_marks & IRE_MARK_CONDEMNED)
23030Sstevel@tonic-gate 				continue;
23040Sstevel@tonic-gate 			if ((ire->ire_type & IRE_INTERFACE) &&
23050Sstevel@tonic-gate 			    (ire->ire_ihandle == cire->ire_ihandle)) {
23060Sstevel@tonic-gate 				IRE_REFHOLD(ire);
23070Sstevel@tonic-gate 				rw_exit(&irb_ptr->irb_lock);
23080Sstevel@tonic-gate 				return (ire);
23090Sstevel@tonic-gate 			}
23100Sstevel@tonic-gate 		}
23110Sstevel@tonic-gate 		rw_exit(&irb_ptr->irb_lock);
23120Sstevel@tonic-gate 	}
23130Sstevel@tonic-gate 	return (NULL);
23140Sstevel@tonic-gate }
23150Sstevel@tonic-gate 
23160Sstevel@tonic-gate 
23170Sstevel@tonic-gate /*
23180Sstevel@tonic-gate  * Locate the interface ire that is tied to the cache ire 'cire' via
23190Sstevel@tonic-gate  * cire->ire_ihandle.
23200Sstevel@tonic-gate  *
23210Sstevel@tonic-gate  * We are trying to create the cache ire for an offlink destn based
23220Sstevel@tonic-gate  * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire
23230Sstevel@tonic-gate  * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in
23240Sstevel@tonic-gate  * the IRE_CACHE case.
23250Sstevel@tonic-gate  */
23260Sstevel@tonic-gate ire_t *
23270Sstevel@tonic-gate ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire)
23280Sstevel@tonic-gate {
23290Sstevel@tonic-gate 	ire_t	*ire;
23300Sstevel@tonic-gate 	int	match_flags;
23310Sstevel@tonic-gate 	in6_addr_t	gw_addr;
23320Sstevel@tonic-gate 	ipif_t		*gw_ipif;
23333448Sdh155122 	ip_stack_t	*ipst = cire->ire_ipst;
23340Sstevel@tonic-gate 
23350Sstevel@tonic-gate 	ASSERT(cire != NULL && pire != NULL);
23360Sstevel@tonic-gate 
23370Sstevel@tonic-gate 	match_flags =  MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
23380Sstevel@tonic-gate 	/*
23390Sstevel@tonic-gate 	 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only
23400Sstevel@tonic-gate 	 * for on-link hosts. We should never be here for onlink.
23410Sstevel@tonic-gate 	 * Thus, use MATCH_IRE_ILL_GROUP.
23420Sstevel@tonic-gate 	 */
23430Sstevel@tonic-gate 	if (pire->ire_ipif != NULL)
23440Sstevel@tonic-gate 		match_flags |= MATCH_IRE_ILL_GROUP;
23450Sstevel@tonic-gate 	/*
23460Sstevel@tonic-gate 	 * We know that the mask of the interface ire equals cire->ire_cmask.
23470Sstevel@tonic-gate 	 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set
23480Sstevel@tonic-gate 	 * its cmask from the interface ire's mask)
23490Sstevel@tonic-gate 	 */
23500Sstevel@tonic-gate 	ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0,
23510Sstevel@tonic-gate 	    IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle,
23523448Sdh155122 	    NULL, match_flags, ipst);
23530Sstevel@tonic-gate 	if (ire != NULL)
23540Sstevel@tonic-gate 		return (ire);
23550Sstevel@tonic-gate 	/*
23560Sstevel@tonic-gate 	 * If we didn't find an interface ire above, we can't declare failure.
23570Sstevel@tonic-gate 	 * For backwards compatibility, we need to support prefix routes
23580Sstevel@tonic-gate 	 * pointing to next hop gateways that are not on-link.
23590Sstevel@tonic-gate 	 *
23600Sstevel@tonic-gate 	 * Assume we are trying to ping some offlink destn, and we have the
23610Sstevel@tonic-gate 	 * routing table below.
23620Sstevel@tonic-gate 	 *
23630Sstevel@tonic-gate 	 * Eg.	default	- gw1		<--- pire	(line 1)
23640Sstevel@tonic-gate 	 *	gw1	- gw2				(line 2)
23650Sstevel@tonic-gate 	 *	gw2	- hme0				(line 3)
23660Sstevel@tonic-gate 	 *
23670Sstevel@tonic-gate 	 * If we already have a cache ire for gw1 in 'cire', the
23680Sstevel@tonic-gate 	 * ire_ftable_lookup_v6 above would have failed, since there is no
23690Sstevel@tonic-gate 	 * interface ire to reach gw1. We will fallthru below.
23700Sstevel@tonic-gate 	 *
23710Sstevel@tonic-gate 	 * Here we duplicate the steps that ire_ftable_lookup_v6() did in
23720Sstevel@tonic-gate 	 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case.
23730Sstevel@tonic-gate 	 * The differences are the following
23740Sstevel@tonic-gate 	 * i.   We want the interface ire only, so we call
23750Sstevel@tonic-gate 	 *	ire_ftable_lookup_v6() instead of ire_route_lookup_v6()
23760Sstevel@tonic-gate 	 * ii.  We look for only prefix routes in the 1st call below.
23770Sstevel@tonic-gate 	 * ii.  We want to match on the ihandle in the 2nd call below.
23780Sstevel@tonic-gate 	 */
23790Sstevel@tonic-gate 	match_flags =  MATCH_IRE_TYPE;
23800Sstevel@tonic-gate 	if (pire->ire_ipif != NULL)
23810Sstevel@tonic-gate 		match_flags |= MATCH_IRE_ILL_GROUP;
23820Sstevel@tonic-gate 
23830Sstevel@tonic-gate 	mutex_enter(&pire->ire_lock);
23840Sstevel@tonic-gate 	gw_addr = pire->ire_gateway_addr_v6;
23850Sstevel@tonic-gate 	mutex_exit(&pire->ire_lock);
23860Sstevel@tonic-gate 	ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET,
23873448Sdh155122 	    pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst);
23880Sstevel@tonic-gate 	if (ire == NULL)
23890Sstevel@tonic-gate 		return (NULL);
23900Sstevel@tonic-gate 	/*
23910Sstevel@tonic-gate 	 * At this point 'ire' corresponds to the entry shown in line 2.
23920Sstevel@tonic-gate 	 * gw_addr is 'gw2' in the example above.
23930Sstevel@tonic-gate 	 */
23940Sstevel@tonic-gate 	mutex_enter(&ire->ire_lock);
23950Sstevel@tonic-gate 	gw_addr = ire->ire_gateway_addr_v6;
23960Sstevel@tonic-gate 	mutex_exit(&ire->ire_lock);
23970Sstevel@tonic-gate 	gw_ipif = ire->ire_ipif;
23980Sstevel@tonic-gate 	ire_refrele(ire);
23990Sstevel@tonic-gate 
24000Sstevel@tonic-gate 	match_flags |= MATCH_IRE_IHANDLE;
24010Sstevel@tonic-gate 	ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE,
24021676Sjpk 	    gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle,
24033448Sdh155122 	    NULL, match_flags, ipst);
24040Sstevel@tonic-gate 	return (ire);
24050Sstevel@tonic-gate }
24060Sstevel@tonic-gate 
24070Sstevel@tonic-gate /*
24080Sstevel@tonic-gate  * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER
24090Sstevel@tonic-gate  * ire associated with the specified ipif.
24100Sstevel@tonic-gate  *
24110Sstevel@tonic-gate  * This might occasionally be called when IPIF_UP is not set since
24120Sstevel@tonic-gate  * the IPV6_MULTICAST_IF as well as creating interface routes
24130Sstevel@tonic-gate  * allows specifying a down ipif (ipif_lookup* match ipifs that are down).
24140Sstevel@tonic-gate  *
24150Sstevel@tonic-gate  * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on
24160Sstevel@tonic-gate  * the ipif this routine might return NULL.
24170Sstevel@tonic-gate  * (Sometimes called as writer though not required by this function.)
24180Sstevel@tonic-gate  */
24190Sstevel@tonic-gate ire_t *
24201676Sjpk ipif_to_ire_v6(const ipif_t *ipif)
24210Sstevel@tonic-gate {
24220Sstevel@tonic-gate 	ire_t	*ire;
24233448Sdh155122 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
24240Sstevel@tonic-gate 
24250Sstevel@tonic-gate 	ASSERT(ipif->ipif_isv6);
24260Sstevel@tonic-gate 	if (ipif->ipif_ire_type == IRE_LOOPBACK) {
24270Sstevel@tonic-gate 		ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL,
24281676Sjpk 		    IRE_LOOPBACK, ipif, ALL_ZONES, NULL,
24293448Sdh155122 		    (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst);
24300Sstevel@tonic-gate 	} else if (ipif->ipif_flags & IPIF_POINTOPOINT) {
24310Sstevel@tonic-gate 		/* In this case we need to lookup destination address. */
24320Sstevel@tonic-gate 		ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr,
24330Sstevel@tonic-gate 		    &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES,
24341676Sjpk 		    0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF |
24353448Sdh155122 		    MATCH_IRE_MASK), ipst);
24360Sstevel@tonic-gate 	} else {
24370Sstevel@tonic-gate 		ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet,
24380Sstevel@tonic-gate 		    &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL,
24391676Sjpk 		    ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF |
24403448Sdh155122 		    MATCH_IRE_MASK), ipst);
24410Sstevel@tonic-gate 	}
24420Sstevel@tonic-gate 	return (ire);
24430Sstevel@tonic-gate }
24440Sstevel@tonic-gate 
24450Sstevel@tonic-gate /*
24460Sstevel@tonic-gate  * Return B_TRUE if a multirt route is resolvable
24470Sstevel@tonic-gate  * (or if no route is resolved yet), B_FALSE otherwise.
24480Sstevel@tonic-gate  * This only works in the global zone.
24490Sstevel@tonic-gate  */
24500Sstevel@tonic-gate boolean_t
24513448Sdh155122 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl,
24523448Sdh155122     ip_stack_t *ipst)
24530Sstevel@tonic-gate {
24540Sstevel@tonic-gate 	ire_t	*first_fire;
24550Sstevel@tonic-gate 	ire_t	*first_cire;
24560Sstevel@tonic-gate 	ire_t	*fire;
24570Sstevel@tonic-gate 	ire_t	*cire;
24580Sstevel@tonic-gate 	irb_t	*firb;
24590Sstevel@tonic-gate 	irb_t	*cirb;
24600Sstevel@tonic-gate 	int	unres_cnt = 0;
24610Sstevel@tonic-gate 	boolean_t resolvable = B_FALSE;
24620Sstevel@tonic-gate 
24630Sstevel@tonic-gate 	/* Retrieve the first IRE_HOST that matches the destination */
24640Sstevel@tonic-gate 	first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST,
24651676Sjpk 	    NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE |
24663448Sdh155122 	    MATCH_IRE_SECATTR, ipst);
24670Sstevel@tonic-gate 
24680Sstevel@tonic-gate 	/* No route at all */
24690Sstevel@tonic-gate 	if (first_fire == NULL) {
24700Sstevel@tonic-gate 		return (B_TRUE);
24710Sstevel@tonic-gate 	}
24720Sstevel@tonic-gate 
24730Sstevel@tonic-gate 	firb = first_fire->ire_bucket;
24740Sstevel@tonic-gate 	ASSERT(firb);
24750Sstevel@tonic-gate 
24760Sstevel@tonic-gate 	/* Retrieve the first IRE_CACHE ire for that destination. */
24773448Sdh155122 	first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst);
24780Sstevel@tonic-gate 
24790Sstevel@tonic-gate 	/* No resolved route. */
24800Sstevel@tonic-gate 	if (first_cire == NULL) {
24810Sstevel@tonic-gate 		ire_refrele(first_fire);
24820Sstevel@tonic-gate 		return (B_TRUE);
24830Sstevel@tonic-gate 	}
24840Sstevel@tonic-gate 
24850Sstevel@tonic-gate 	/* At least one route is resolved. */
24860Sstevel@tonic-gate 
24870Sstevel@tonic-gate 	cirb = first_cire->ire_bucket;
24880Sstevel@tonic-gate 	ASSERT(cirb);
24890Sstevel@tonic-gate 
24900Sstevel@tonic-gate 	/* Count the number of routes to that dest that are declared. */
24910Sstevel@tonic-gate 	IRB_REFHOLD(firb);
24920Sstevel@tonic-gate 	for (fire = first_fire; fire != NULL; fire = fire->ire_next) {
24930Sstevel@tonic-gate 		if (!(fire->ire_flags & RTF_MULTIRT))
24940Sstevel@tonic-gate 			continue;
24950Sstevel@tonic-gate 		if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp))
24960Sstevel@tonic-gate 			continue;
24970Sstevel@tonic-gate 		unres_cnt++;
24980Sstevel@tonic-gate 	}
24990Sstevel@tonic-gate 	IRB_REFRELE(firb);
25000Sstevel@tonic-gate 
25010Sstevel@tonic-gate 
25020Sstevel@tonic-gate 	/* Then subtract the number of routes to that dst that are resolved */
25030Sstevel@tonic-gate 	IRB_REFHOLD(cirb);
25040Sstevel@tonic-gate 	for (cire = first_cire; cire != NULL; cire = cire->ire_next) {
25050Sstevel@tonic-gate 	    if (!(cire->ire_flags & RTF_MULTIRT))
25060Sstevel@tonic-gate 		continue;
25070Sstevel@tonic-gate 	    if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp))
25080Sstevel@tonic-gate 		continue;
25090Sstevel@tonic-gate 	    if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN))
25100Sstevel@tonic-gate 		continue;
25110Sstevel@tonic-gate 	    unres_cnt--;
25120Sstevel@tonic-gate 	}
25130Sstevel@tonic-gate 	IRB_REFRELE(cirb);
25140Sstevel@tonic-gate 
25150Sstevel@tonic-gate 	/* At least one route is unresolved; search for a resolvable route. */
25160Sstevel@tonic-gate 	if (unres_cnt > 0)
25170Sstevel@tonic-gate 		resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire,
25183448Sdh155122 		    MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst);
25190Sstevel@tonic-gate 
25200Sstevel@tonic-gate 	if (first_fire)
25210Sstevel@tonic-gate 		ire_refrele(first_fire);
25220Sstevel@tonic-gate 
25230Sstevel@tonic-gate 	if (first_cire)
25240Sstevel@tonic-gate 		ire_refrele(first_cire);
25250Sstevel@tonic-gate 
25260Sstevel@tonic-gate 	return (resolvable);
25270Sstevel@tonic-gate }
25280Sstevel@tonic-gate 
25290Sstevel@tonic-gate 
25300Sstevel@tonic-gate /*
25310Sstevel@tonic-gate  * Return B_TRUE and update *ire_arg and *fire_arg
25320Sstevel@tonic-gate  * if at least one resolvable route is found.
25330Sstevel@tonic-gate  * Return B_FALSE otherwise (all routes are resolved or
25340Sstevel@tonic-gate  * the remaining unresolved routes are all unresolvable).
25350Sstevel@tonic-gate  * This only works in the global zone.
25360Sstevel@tonic-gate  */
25370Sstevel@tonic-gate boolean_t
25381676Sjpk ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags,
25393448Sdh155122     const ts_label_t *tsl, ip_stack_t *ipst)
25400Sstevel@tonic-gate {
25410Sstevel@tonic-gate 	clock_t	delta;
25420Sstevel@tonic-gate 	ire_t	*best_fire = NULL;
25430Sstevel@tonic-gate 	ire_t	*best_cire = NULL;
25440Sstevel@tonic-gate 	ire_t	*first_fire;
25450Sstevel@tonic-gate 	ire_t	*first_cire;
25460Sstevel@tonic-gate 	ire_t	*fire;
25470Sstevel@tonic-gate 	ire_t	*cire;
25480Sstevel@tonic-gate 	irb_t	*firb = NULL;
25490Sstevel@tonic-gate 	irb_t	*cirb = NULL;
25500Sstevel@tonic-gate 	ire_t	*gw_ire;
25510Sstevel@tonic-gate 	boolean_t	already_resolved;
25520Sstevel@tonic-gate 	boolean_t	res;
25530Sstevel@tonic-gate 	in6_addr_t	v6dst;
25540Sstevel@tonic-gate 	in6_addr_t	v6gw;
25550Sstevel@tonic-gate 
25560Sstevel@tonic-gate 	ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, "
25570Sstevel@tonic-gate 	    "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags));
25580Sstevel@tonic-gate 
25590Sstevel@tonic-gate 	ASSERT(ire_arg);
25600Sstevel@tonic-gate 	ASSERT(fire_arg);
25610Sstevel@tonic-gate 
25620Sstevel@tonic-gate 	/* Not an IRE_HOST ire; give up. */
25630Sstevel@tonic-gate 	if ((*fire_arg == NULL) ||
25640Sstevel@tonic-gate 	    ((*fire_arg)->ire_type != IRE_HOST)) {
25650Sstevel@tonic-gate 		return (B_FALSE);
25660Sstevel@tonic-gate 	}
25670Sstevel@tonic-gate 
25680Sstevel@tonic-gate 	/* This is the first IRE_HOST ire for that destination. */
25690Sstevel@tonic-gate 	first_fire = *fire_arg;
25700Sstevel@tonic-gate 	firb = first_fire->ire_bucket;
25710Sstevel@tonic-gate 	ASSERT(firb);
25720Sstevel@tonic-gate 
25730Sstevel@tonic-gate 	mutex_enter(&first_fire->ire_lock);
25740Sstevel@tonic-gate 	v6dst = first_fire->ire_addr_v6;
25750Sstevel@tonic-gate 	mutex_exit(&first_fire->ire_lock);
25760Sstevel@tonic-gate 
25770Sstevel@tonic-gate 	ip2dbg(("ire_multirt_lookup_v6: dst %08x\n",
25780Sstevel@tonic-gate 	    ntohl(V4_PART_OF_V6(v6dst))));
25790Sstevel@tonic-gate 
25800Sstevel@tonic-gate 	/*
25810Sstevel@tonic-gate 	 * Retrieve the first IRE_CACHE ire for that destination;
25820Sstevel@tonic-gate 	 * if we don't find one, no route for that dest is
25830Sstevel@tonic-gate 	 * resolved yet.
25840Sstevel@tonic-gate 	 */
25853448Sdh155122 	first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst);
25860Sstevel@tonic-gate 	if (first_cire) {
25870Sstevel@tonic-gate 		cirb = first_cire->ire_bucket;
25880Sstevel@tonic-gate 	}
25890Sstevel@tonic-gate 
25900Sstevel@tonic-gate 	ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire));
25910Sstevel@tonic-gate 
25920Sstevel@tonic-gate 	/*
25930Sstevel@tonic-gate 	 * Search for a resolvable route, giving the top priority
25940Sstevel@tonic-gate 	 * to routes that can be resolved without any call to the resolver.
25950Sstevel@tonic-gate 	 */
25960Sstevel@tonic-gate 	IRB_REFHOLD(firb);
25970Sstevel@tonic-gate 
25980Sstevel@tonic-gate 	if (!IN6_IS_ADDR_MULTICAST(&v6dst)) {
25990Sstevel@tonic-gate 		/*
26000Sstevel@tonic-gate 		 * For all multiroute IRE_HOST ires for that destination,
26010Sstevel@tonic-gate 		 * check if the route via the IRE_HOST's gateway is
26020Sstevel@tonic-gate 		 * resolved yet.
26030Sstevel@tonic-gate 		 */
26040Sstevel@tonic-gate 		for (fire = first_fire; fire != NULL; fire = fire->ire_next) {
26050Sstevel@tonic-gate 
26060Sstevel@tonic-gate 			if (!(fire->ire_flags & RTF_MULTIRT))
26070Sstevel@tonic-gate 				continue;
26080Sstevel@tonic-gate 			if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst))
26090Sstevel@tonic-gate 				continue;
26100Sstevel@tonic-gate 
26111676Sjpk 			if (fire->ire_gw_secattr != NULL &&
26121676Sjpk 			    tsol_ire_match_gwattr(fire, tsl) != 0) {
26131676Sjpk 				continue;
26141676Sjpk 			}
26151676Sjpk 
26160Sstevel@tonic-gate 			mutex_enter(&fire->ire_lock);
26170Sstevel@tonic-gate 			v6gw = fire->ire_gateway_addr_v6;
26180Sstevel@tonic-gate 			mutex_exit(&fire->ire_lock);
26190Sstevel@tonic-gate 
26200Sstevel@tonic-gate 			ip2dbg(("ire_multirt_lookup_v6: fire %p, "
26210Sstevel@tonic-gate 			    "ire_addr %08x, ire_gateway_addr %08x\n",
26220Sstevel@tonic-gate 			    (void *)fire,
26230Sstevel@tonic-gate 			    ntohl(V4_PART_OF_V6(fire->ire_addr_v6)),
26240Sstevel@tonic-gate 			    ntohl(V4_PART_OF_V6(v6gw))));
26250Sstevel@tonic-gate 
26260Sstevel@tonic-gate 			already_resolved = B_FALSE;
26270Sstevel@tonic-gate 
26280Sstevel@tonic-gate 			if (first_cire) {
26290Sstevel@tonic-gate 				ASSERT(cirb);
26300Sstevel@tonic-gate 
26310Sstevel@tonic-gate 				IRB_REFHOLD(cirb);
26320Sstevel@tonic-gate 				/*
26330Sstevel@tonic-gate 				 * For all IRE_CACHE ires for that
26340Sstevel@tonic-gate 				 * destination.
26350Sstevel@tonic-gate 				 */
26360Sstevel@tonic-gate 				for (cire = first_cire;
26370Sstevel@tonic-gate 				    cire != NULL;
26380Sstevel@tonic-gate 				    cire = cire->ire_next) {
26390Sstevel@tonic-gate 
26400Sstevel@tonic-gate 					if (!(cire->ire_flags & RTF_MULTIRT))
26410Sstevel@tonic-gate 						continue;
26420Sstevel@tonic-gate 					if (!IN6_ARE_ADDR_EQUAL(
26430Sstevel@tonic-gate 					    &cire->ire_addr_v6, &v6dst))
26440Sstevel@tonic-gate 						continue;
26450Sstevel@tonic-gate 					if (cire->ire_marks &
26460Sstevel@tonic-gate 					    (IRE_MARK_CONDEMNED|
26470Sstevel@tonic-gate 						IRE_MARK_HIDDEN))
26480Sstevel@tonic-gate 						continue;
26491676Sjpk 
26501676Sjpk 					if (cire->ire_gw_secattr != NULL &&
26511676Sjpk 					    tsol_ire_match_gwattr(cire,
26521676Sjpk 					    tsl) != 0) {
26531676Sjpk 						continue;
26541676Sjpk 					}
26551676Sjpk 
26560Sstevel@tonic-gate 					/*
26570Sstevel@tonic-gate 					 * Check if the IRE_CACHE's gateway
26580Sstevel@tonic-gate 					 * matches the IRE_HOST's gateway.
26590Sstevel@tonic-gate 					 */
26600Sstevel@tonic-gate 					if (IN6_ARE_ADDR_EQUAL(
26610Sstevel@tonic-gate 					    &cire->ire_gateway_addr_v6,
26620Sstevel@tonic-gate 					    &v6gw)) {
26630Sstevel@tonic-gate 						already_resolved = B_TRUE;
26640Sstevel@tonic-gate 						break;
26650Sstevel@tonic-gate 					}
26660Sstevel@tonic-gate 				}
26670Sstevel@tonic-gate 				IRB_REFRELE(cirb);
26680Sstevel@tonic-gate 			}
26690Sstevel@tonic-gate 
26700Sstevel@tonic-gate 			/*
26710Sstevel@tonic-gate 			 * This route is already resolved;
26720Sstevel@tonic-gate 			 * proceed with next one.
26730Sstevel@tonic-gate 			 */
26740Sstevel@tonic-gate 			if (already_resolved) {
26750Sstevel@tonic-gate 				ip2dbg(("ire_multirt_lookup_v6: found cire %p, "
26760Sstevel@tonic-gate 				    "already resolved\n", (void *)cire));
26770Sstevel@tonic-gate 				continue;
26780Sstevel@tonic-gate 			}
26790Sstevel@tonic-gate 
26800Sstevel@tonic-gate 			/*
26810Sstevel@tonic-gate 			 * The route is unresolved; is it actually
26820Sstevel@tonic-gate 			 * resolvable, i.e. is there a cache or a resolver
26830Sstevel@tonic-gate 			 * for the gateway?
26840Sstevel@tonic-gate 			 */
26850Sstevel@tonic-gate 			gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL,
26861676Sjpk 			    ALL_ZONES, tsl, MATCH_IRE_RECURSIVE |
26873448Sdh155122 			    MATCH_IRE_SECATTR, ipst);
26880Sstevel@tonic-gate 
26890Sstevel@tonic-gate 			ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n",
26900Sstevel@tonic-gate 			    (void *)gw_ire));
26910Sstevel@tonic-gate 
26920Sstevel@tonic-gate 			/*
26930Sstevel@tonic-gate 			 * This route can be resolved without any call to the
26940Sstevel@tonic-gate 			 * resolver; if the MULTIRT_CACHEGW flag is set,
26950Sstevel@tonic-gate 			 * give the top priority to this ire and exit the
26960Sstevel@tonic-gate 			 * loop.
26970Sstevel@tonic-gate 			 * This occurs when an resolver reply is processed
26980Sstevel@tonic-gate 			 * through ip_wput_nondata()
26990Sstevel@tonic-gate 			 */
27000Sstevel@tonic-gate 			if ((flags & MULTIRT_CACHEGW) &&
27010Sstevel@tonic-gate 			    (gw_ire != NULL) &&
27020Sstevel@tonic-gate 			    (gw_ire->ire_type & IRE_CACHETABLE)) {
27030Sstevel@tonic-gate 				/*
27040Sstevel@tonic-gate 				 * Release the resolver associated to the
27050Sstevel@tonic-gate 				 * previous candidate best ire, if any.
27060Sstevel@tonic-gate 				 */
27070Sstevel@tonic-gate 				if (best_cire) {
27080Sstevel@tonic-gate 					ire_refrele(best_cire);
27090Sstevel@tonic-gate 					ASSERT(best_fire);
27100Sstevel@tonic-gate 				}
27110Sstevel@tonic-gate 
27120Sstevel@tonic-gate 				best_fire = fire;
27130Sstevel@tonic-gate 				best_cire = gw_ire;
27140Sstevel@tonic-gate 
27150Sstevel@tonic-gate 				ip2dbg(("ire_multirt_lookup_v6: found top prio "
27160Sstevel@tonic-gate 				    "best_fire %p, best_cire %p\n",
27170Sstevel@tonic-gate 				    (void *)best_fire, (void *)best_cire));
27180Sstevel@tonic-gate 				break;
27190Sstevel@tonic-gate 			}
27200Sstevel@tonic-gate 
27210Sstevel@tonic-gate 			/*
27220Sstevel@tonic-gate 			 * Compute the time elapsed since our preceding
27230Sstevel@tonic-gate 			 * attempt to  resolve that route.
27240Sstevel@tonic-gate 			 * If the MULTIRT_USESTAMP flag is set, we take that
27250Sstevel@tonic-gate 			 * route into account only if this time interval
27260Sstevel@tonic-gate 			 * exceeds ip_multirt_resolution_interval;
27270Sstevel@tonic-gate 			 * this prevents us from attempting to resolve a
27280Sstevel@tonic-gate 			 * broken route upon each sending of a packet.
27290Sstevel@tonic-gate 			 */
27300Sstevel@tonic-gate 			delta = lbolt - fire->ire_last_used_time;
27310Sstevel@tonic-gate 			delta = TICK_TO_MSEC(delta);
27320Sstevel@tonic-gate 
27330Sstevel@tonic-gate 			res = (boolean_t)
27343448Sdh155122 			    ((delta > ipst->
27353448Sdh155122 				ips_ip_multirt_resolution_interval) ||
27363448Sdh155122 			    (!(flags & MULTIRT_USESTAMP)));
27370Sstevel@tonic-gate 
27380Sstevel@tonic-gate 			ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, "
27390Sstevel@tonic-gate 			    "res %d\n",
27400Sstevel@tonic-gate 			    (void *)fire, delta, res));
27410Sstevel@tonic-gate 
27420Sstevel@tonic-gate 			if (res) {
27430Sstevel@tonic-gate 				/*
27440Sstevel@tonic-gate 				 * A resolver exists for the gateway: save
27450Sstevel@tonic-gate 				 * the current IRE_HOST ire as a candidate
27460Sstevel@tonic-gate 				 * best ire. If we later discover that a
27470Sstevel@tonic-gate 				 * top priority ire exists (i.e. no need to
27480Sstevel@tonic-gate 				 * call the resolver), then this new ire
27490Sstevel@tonic-gate 				 * will be preferred to the current one.
27500Sstevel@tonic-gate 				 */
27510Sstevel@tonic-gate 				if (gw_ire != NULL) {
27520Sstevel@tonic-gate 					if (best_fire == NULL) {
27530Sstevel@tonic-gate 						ASSERT(best_cire == NULL);
27540Sstevel@tonic-gate 
27550Sstevel@tonic-gate 						best_fire = fire;
27560Sstevel@tonic-gate 						best_cire = gw_ire;
27570Sstevel@tonic-gate 
27580Sstevel@tonic-gate 						ip2dbg(("ire_multirt_lookup_v6:"
27590Sstevel@tonic-gate 						    "found candidate "
27600Sstevel@tonic-gate 						    "best_fire %p, "
27610Sstevel@tonic-gate 						    "best_cire %p\n",
27620Sstevel@tonic-gate 						    (void *)best_fire,
27630Sstevel@tonic-gate 						    (void *)best_cire));
27640Sstevel@tonic-gate 
27650Sstevel@tonic-gate 						/*
27660Sstevel@tonic-gate 						 * If MULTIRT_CACHEGW is not
27670Sstevel@tonic-gate 						 * set, we ignore the top
27680Sstevel@tonic-gate 						 * priority ires that can
27690Sstevel@tonic-gate 						 * be resolved without any
27700Sstevel@tonic-gate 						 * call to the resolver;
27710Sstevel@tonic-gate 						 * In that case, there is
27720Sstevel@tonic-gate 						 * actually no need
27730Sstevel@tonic-gate 						 * to continue the loop.
27740Sstevel@tonic-gate 						 */
27750Sstevel@tonic-gate 						if (!(flags &
27760Sstevel@tonic-gate 						    MULTIRT_CACHEGW)) {
27770Sstevel@tonic-gate 							break;
27780Sstevel@tonic-gate 						}
27790Sstevel@tonic-gate 						continue;
27800Sstevel@tonic-gate 					}
27810Sstevel@tonic-gate 				} else {
27820Sstevel@tonic-gate 					/*
27830Sstevel@tonic-gate 					 * No resolver for the gateway: the
27840Sstevel@tonic-gate 					 * route is not resolvable.
27850Sstevel@tonic-gate 					 * If the MULTIRT_SETSTAMP flag is
27860Sstevel@tonic-gate 					 * set, we stamp the IRE_HOST ire,
27870Sstevel@tonic-gate 					 * so we will not select it again
27880Sstevel@tonic-gate 					 * during this resolution interval.
27890Sstevel@tonic-gate 					 */
27900Sstevel@tonic-gate 					if (flags & MULTIRT_SETSTAMP)
27910Sstevel@tonic-gate 						fire->ire_last_used_time =
27920Sstevel@tonic-gate 						    lbolt;
27930Sstevel@tonic-gate 				}
27940Sstevel@tonic-gate 			}
27950Sstevel@tonic-gate 
27960Sstevel@tonic-gate 			if (gw_ire != NULL)
27970Sstevel@tonic-gate 				ire_refrele(gw_ire);
27980Sstevel@tonic-gate 		}
27990Sstevel@tonic-gate 	} else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */
28000Sstevel@tonic-gate 
28010Sstevel@tonic-gate 		for (fire = first_fire;
28020Sstevel@tonic-gate 		    fire != NULL;
28030Sstevel@tonic-gate 		    fire = fire->ire_next) {
28040Sstevel@tonic-gate 
28050Sstevel@tonic-gate 			if (!(fire->ire_flags & RTF_MULTIRT))
28060Sstevel@tonic-gate 				continue;
28070Sstevel@tonic-gate 			if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst))
28080Sstevel@tonic-gate 				continue;
28090Sstevel@tonic-gate 
28101676Sjpk 			if (fire->ire_gw_secattr != NULL &&
28111676Sjpk 			    tsol_ire_match_gwattr(fire, tsl) != 0) {
28121676Sjpk 				continue;
28131676Sjpk 			}
28141676Sjpk 
28150Sstevel@tonic-gate 			already_resolved = B_FALSE;
28160Sstevel@tonic-gate 
28170Sstevel@tonic-gate 			mutex_enter(&fire->ire_lock);
28180Sstevel@tonic-gate 			v6gw = fire->ire_gateway_addr_v6;
28190Sstevel@tonic-gate 			mutex_exit(&fire->ire_lock);
28200Sstevel@tonic-gate 
28210Sstevel@tonic-gate 			gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0,
28221676Sjpk 			    IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl,
28231676Sjpk 			    MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE |
28243448Sdh155122 			    MATCH_IRE_SECATTR, ipst);
28250Sstevel@tonic-gate 
28260Sstevel@tonic-gate 			/* No resolver for the gateway; we skip this ire. */
28270Sstevel@tonic-gate 			if (gw_ire == NULL) {
28280Sstevel@tonic-gate 				continue;
28290Sstevel@tonic-gate 			}
28300Sstevel@tonic-gate 
28310Sstevel@tonic-gate 			if (first_cire) {
28320Sstevel@tonic-gate 
28330Sstevel@tonic-gate 				IRB_REFHOLD(cirb);
28340Sstevel@tonic-gate 				/*
28350Sstevel@tonic-gate 				 * For all IRE_CACHE ires for that
28360Sstevel@tonic-gate 				 * destination.
28370Sstevel@tonic-gate 				 */
28380Sstevel@tonic-gate 				for (cire = first_cire;
28390Sstevel@tonic-gate 				    cire != NULL;
28400Sstevel@tonic-gate 				    cire = cire->ire_next) {
28410Sstevel@tonic-gate 
28420Sstevel@tonic-gate 					if (!(cire->ire_flags & RTF_MULTIRT))
28430Sstevel@tonic-gate 						continue;
28440Sstevel@tonic-gate 					if (!IN6_ARE_ADDR_EQUAL(
28450Sstevel@tonic-gate 					    &cire->ire_addr_v6, &v6dst))
28460Sstevel@tonic-gate 						continue;
28470Sstevel@tonic-gate 					if (cire->ire_marks &
28480Sstevel@tonic-gate 					    (IRE_MARK_CONDEMNED|
28490Sstevel@tonic-gate 						IRE_MARK_HIDDEN))
28500Sstevel@tonic-gate 						continue;
28511676Sjpk 
28521676Sjpk 					if (cire->ire_gw_secattr != NULL &&
28531676Sjpk 					    tsol_ire_match_gwattr(cire,
28541676Sjpk 					    tsl) != 0) {
28551676Sjpk 						continue;
28561676Sjpk 					}
28571676Sjpk 
28580Sstevel@tonic-gate 					/*
28590Sstevel@tonic-gate 					 * Cache entries are linked to the
28600Sstevel@tonic-gate 					 * parent routes using the parent handle
28610Sstevel@tonic-gate 					 * (ire_phandle). If no cache entry has
28620Sstevel@tonic-gate 					 * the same handle as fire, fire is
28630Sstevel@tonic-gate 					 * still unresolved.
28640Sstevel@tonic-gate 					 */
28650Sstevel@tonic-gate 					ASSERT(cire->ire_phandle != 0);
28660Sstevel@tonic-gate 					if (cire->ire_phandle ==
28670Sstevel@tonic-gate 					    fire->ire_phandle) {
28680Sstevel@tonic-gate 						already_resolved = B_TRUE;
28690Sstevel@tonic-gate 						break;
28700Sstevel@tonic-gate 					}
28710Sstevel@tonic-gate 				}
28720Sstevel@tonic-gate 				IRB_REFRELE(cirb);
28730Sstevel@tonic-gate 			}
28740Sstevel@tonic-gate 
28750Sstevel@tonic-gate 			/*
28760Sstevel@tonic-gate 			 * This route is already resolved; proceed with
28770Sstevel@tonic-gate 			 * next one.
28780Sstevel@tonic-gate 			 */
28790Sstevel@tonic-gate 			if (already_resolved) {
28800Sstevel@tonic-gate 				ire_refrele(gw_ire);
28810Sstevel@tonic-gate 				continue;
28820Sstevel@tonic-gate 			}
28830Sstevel@tonic-gate 
28840Sstevel@tonic-gate 			/*
28850Sstevel@tonic-gate 			 * Compute the time elapsed since our preceding
28860Sstevel@tonic-gate 			 * attempt to resolve that route.
28870Sstevel@tonic-gate 			 * If the MULTIRT_USESTAMP flag is set, we take
28880Sstevel@tonic-gate 			 * that route into account only if this time
28890Sstevel@tonic-gate 			 * interval exceeds ip_multirt_resolution_interval;
28900Sstevel@tonic-gate 			 * this prevents us from attempting to resolve a
28910Sstevel@tonic-gate 			 * broken route upon each sending of a packet.
28920Sstevel@tonic-gate 			 */
28930Sstevel@tonic-gate 			delta = lbolt - fire->ire_last_used_time;
28940Sstevel@tonic-gate 			delta = TICK_TO_MSEC(delta);
28950Sstevel@tonic-gate 
28960Sstevel@tonic-gate 			res = (boolean_t)
28973448Sdh155122 			    ((delta > ipst->
28983448Sdh155122 				ips_ip_multirt_resolution_interval) ||
28990Sstevel@tonic-gate 			    (!(flags & MULTIRT_USESTAMP)));
29000Sstevel@tonic-gate 
29010Sstevel@tonic-gate 			ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, "
29020Sstevel@tonic-gate 			    "flags %04x, res %d\n",
29030Sstevel@tonic-gate 			    (void *)fire, delta, flags, res));
29040Sstevel@tonic-gate 
29050Sstevel@tonic-gate 			if (res) {
29060Sstevel@tonic-gate 				if (best_cire) {
29070Sstevel@tonic-gate 					/*
29080Sstevel@tonic-gate 					 * Release the resolver associated
29090Sstevel@tonic-gate 					 * to the preceding candidate best
29100Sstevel@tonic-gate 					 * ire, if any.
29110Sstevel@tonic-gate 					 */
29120Sstevel@tonic-gate 					ire_refrele(best_cire);
29130Sstevel@tonic-gate 					ASSERT(best_fire);
29140Sstevel@tonic-gate 				}
29150Sstevel@tonic-gate 				best_fire = fire;
29160Sstevel@tonic-gate 				best_cire = gw_ire;
29170Sstevel@tonic-gate 				continue;
29180Sstevel@tonic-gate 			}
29190Sstevel@tonic-gate 
29200Sstevel@tonic-gate 			ire_refrele(gw_ire);
29210Sstevel@tonic-gate 		}
29220Sstevel@tonic-gate 	}
29230Sstevel@tonic-gate 
29240Sstevel@tonic-gate 	if (best_fire) {
29250Sstevel@tonic-gate 		IRE_REFHOLD(best_fire);
29260Sstevel@tonic-gate 	}
29270Sstevel@tonic-gate 	IRB_REFRELE(firb);
29280Sstevel@tonic-gate 
29290Sstevel@tonic-gate 	/* Release the first IRE_CACHE we initially looked up, if any. */
29300Sstevel@tonic-gate 	if (first_cire)
29310Sstevel@tonic-gate 		ire_refrele(first_cire);
29320Sstevel@tonic-gate 
29330Sstevel@tonic-gate 	/* Found a resolvable route. */
29340Sstevel@tonic-gate 	if (best_fire) {
29350Sstevel@tonic-gate 		ASSERT(best_cire);
29360Sstevel@tonic-gate 
29370Sstevel@tonic-gate 		if (*fire_arg)
29380Sstevel@tonic-gate 			ire_refrele(*fire_arg);
29390Sstevel@tonic-gate 		if (*ire_arg)
29400Sstevel@tonic-gate 			ire_refrele(*ire_arg);
29410Sstevel@tonic-gate 
29420Sstevel@tonic-gate 		/*
29430Sstevel@tonic-gate 		 * Update the passed arguments with the
29440Sstevel@tonic-gate 		 * resolvable multirt route we found
29450Sstevel@tonic-gate 		 */
29460Sstevel@tonic-gate 		*fire_arg = best_fire;
29470Sstevel@tonic-gate 		*ire_arg = best_cire;
29480Sstevel@tonic-gate 
29490Sstevel@tonic-gate 		ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, "
29500Sstevel@tonic-gate 		    "*fire_arg %p, *ire_arg %p\n",
29510Sstevel@tonic-gate 		    (void *)best_fire, (void *)best_cire));
29520Sstevel@tonic-gate 
29530Sstevel@tonic-gate 		return (B_TRUE);
29540Sstevel@tonic-gate 	}
29550Sstevel@tonic-gate 
29560Sstevel@tonic-gate 	ASSERT(best_cire == NULL);
29570Sstevel@tonic-gate 
29580Sstevel@tonic-gate 	ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, "
29590Sstevel@tonic-gate 	    "*ire_arg %p\n",
29600Sstevel@tonic-gate 	    (void *)*fire_arg, (void *)*ire_arg));
29610Sstevel@tonic-gate 
29620Sstevel@tonic-gate 	/* No resolvable route. */
29630Sstevel@tonic-gate 	return (B_FALSE);
29640Sstevel@tonic-gate }
29650Sstevel@tonic-gate 
29660Sstevel@tonic-gate 
29670Sstevel@tonic-gate /*
29680Sstevel@tonic-gate  * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp'
29690Sstevel@tonic-gate  * that goes through 'ipif'. As a fallback, a route that goes through
29700Sstevel@tonic-gate  * ipif->ipif_ill can be returned.
29710Sstevel@tonic-gate  */
29720Sstevel@tonic-gate ire_t *
29730Sstevel@tonic-gate ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp)
29740Sstevel@tonic-gate {
29750Sstevel@tonic-gate 	ire_t	*ire;
29760Sstevel@tonic-gate 	ire_t	*save_ire = NULL;
29770Sstevel@tonic-gate 	ire_t   *gw_ire;
29780Sstevel@tonic-gate 	irb_t   *irb;
29790Sstevel@tonic-gate 	in6_addr_t v6gw;
29800Sstevel@tonic-gate 	int	match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL;
29813448Sdh155122 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
29820Sstevel@tonic-gate 
29830Sstevel@tonic-gate 	ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0,
29843448Sdh155122 	    NULL, MATCH_IRE_DEFAULT, ipst);
29850Sstevel@tonic-gate 
29860Sstevel@tonic-gate 	if (ire == NULL)
29870Sstevel@tonic-gate 		return (NULL);
29880Sstevel@tonic-gate 
29890Sstevel@tonic-gate 	irb = ire->ire_bucket;
29900Sstevel@tonic-gate 	ASSERT(irb);
29910Sstevel@tonic-gate 
29920Sstevel@tonic-gate 	IRB_REFHOLD(irb);
29930Sstevel@tonic-gate 	ire_refrele(ire);
29940Sstevel@tonic-gate 	for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
29950Sstevel@tonic-gate 		if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) ||
29961676Sjpk 		    (ipif->ipif_zoneid != ire->ire_zoneid &&
29971676Sjpk 		    ire->ire_zoneid != ALL_ZONES)) {
29980Sstevel@tonic-gate 			continue;
29990Sstevel@tonic-gate 		}
30000Sstevel@tonic-gate 
30010Sstevel@tonic-gate 		switch (ire->ire_type) {
30020Sstevel@tonic-gate 		case IRE_DEFAULT:
30030Sstevel@tonic-gate 		case IRE_PREFIX:
30040Sstevel@tonic-gate 		case IRE_HOST:
30050Sstevel@tonic-gate 			mutex_enter(&ire->ire_lock);
30060Sstevel@tonic-gate 			v6gw = ire->ire_gateway_addr_v6;
30070Sstevel@tonic-gate 			mutex_exit(&ire->ire_lock);
30080Sstevel@tonic-gate 			gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0,
30090Sstevel@tonic-gate 			    IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0,
30103448Sdh155122 			    NULL, match_flags, ipst);
30110Sstevel@tonic-gate 
30120Sstevel@tonic-gate 			if (gw_ire != NULL) {
30130Sstevel@tonic-gate 				if (save_ire != NULL) {
30140Sstevel@tonic-gate 					ire_refrele(save_ire);
30150Sstevel@tonic-gate 				}
30160Sstevel@tonic-gate 				IRE_REFHOLD(ire);
30170Sstevel@tonic-gate 				if (gw_ire->ire_ipif == ipif) {
30180Sstevel@tonic-gate 					ire_refrele(gw_ire);
30190Sstevel@tonic-gate 
30200Sstevel@tonic-gate 					IRB_REFRELE(irb);
30210Sstevel@tonic-gate 					return (ire);
30220Sstevel@tonic-gate 				}
30230Sstevel@tonic-gate 				ire_refrele(gw_ire);
30240Sstevel@tonic-gate 				save_ire = ire;
30250Sstevel@tonic-gate 			}
30260Sstevel@tonic-gate 			break;
30270Sstevel@tonic-gate 		case IRE_IF_NORESOLVER:
30280Sstevel@tonic-gate 		case IRE_IF_RESOLVER:
30290Sstevel@tonic-gate 			if (ire->ire_ipif == ipif) {
30300Sstevel@tonic-gate 				if (save_ire != NULL) {
30310Sstevel@tonic-gate 					ire_refrele(save_ire);
30320Sstevel@tonic-gate 				}
30330Sstevel@tonic-gate 				IRE_REFHOLD(ire);
30340Sstevel@tonic-gate 
30350Sstevel@tonic-gate 				IRB_REFRELE(irb);
30360Sstevel@tonic-gate 				return (ire);
30370Sstevel@tonic-gate 			}
30380Sstevel@tonic-gate 			break;
30390Sstevel@tonic-gate 		}
30400Sstevel@tonic-gate 	}
30410Sstevel@tonic-gate 	IRB_REFRELE(irb);
30420Sstevel@tonic-gate 
30430Sstevel@tonic-gate 	return (save_ire);
30440Sstevel@tonic-gate }
3045