xref: /onnv-gate/usr/src/uts/common/inet/ip/tnet.c (revision 10934:e209937a4f19)
11676Sjpk /*
21676Sjpk  * CDDL HEADER START
31676Sjpk  *
41676Sjpk  * The contents of this file are subject to the terms of the
51676Sjpk  * Common Development and Distribution License (the "License").
61676Sjpk  * You may not use this file except in compliance with the License.
71676Sjpk  *
81676Sjpk  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91676Sjpk  * or http://www.opensolaris.org/os/licensing.
101676Sjpk  * See the License for the specific language governing permissions
111676Sjpk  * and limitations under the License.
121676Sjpk  *
131676Sjpk  * When distributing Covered Code, include this CDDL HEADER in each
141676Sjpk  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151676Sjpk  * If applicable, add the following below this CDDL HEADER, with the
161676Sjpk  * fields enclosed by brackets "[]" replaced with your own identifying
171676Sjpk  * information: Portions Copyright [yyyy] [name of copyright owner]
181676Sjpk  *
191676Sjpk  * CDDL HEADER END
201676Sjpk  */
211676Sjpk /*
228778SErik.Nordmark@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
231676Sjpk  * Use is subject to license terms.
241676Sjpk  */
251676Sjpk 
261676Sjpk #include <sys/types.h>
271676Sjpk #include <sys/stream.h>
281676Sjpk #include <sys/strsubr.h>
291676Sjpk #include <sys/stropts.h>
301676Sjpk #include <sys/sunddi.h>
311676Sjpk #include <sys/cred.h>
321676Sjpk #include <sys/debug.h>
331676Sjpk #include <sys/kmem.h>
341676Sjpk #include <sys/errno.h>
351676Sjpk #include <sys/disp.h>
361676Sjpk #include <netinet/in.h>
371676Sjpk #include <netinet/in_systm.h>
381676Sjpk #include <netinet/ip.h>
391676Sjpk #include <netinet/ip_icmp.h>
401676Sjpk #include <netinet/tcp.h>
411676Sjpk #include <inet/common.h>
421676Sjpk #include <inet/ipclassifier.h>
431676Sjpk #include <inet/ip.h>
441676Sjpk #include <inet/mib2.h>
451676Sjpk #include <inet/nd.h>
461676Sjpk #include <inet/tcp.h>
471676Sjpk #include <inet/ip_rts.h>
481676Sjpk #include <inet/ip_ire.h>
491676Sjpk #include <inet/ip_if.h>
501676Sjpk #include <sys/modhash.h>
511676Sjpk 
521676Sjpk #include <sys/tsol/label.h>
531676Sjpk #include <sys/tsol/label_macro.h>
541676Sjpk #include <sys/tsol/tnet.h>
551676Sjpk #include <sys/tsol/tndb.h>
561676Sjpk #include <sys/strsun.h>
571676Sjpk 
581676Sjpk /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */
591676Sjpk int tsol_strict_error;
601676Sjpk 
611676Sjpk /*
621676Sjpk  * Some notes on the Trusted Solaris IRE gateway security attributes:
631676Sjpk  *
641676Sjpk  * When running in Trusted mode, the routing subsystem determines whether or
651676Sjpk  * not a packet can be delivered to an off-link host (not directly reachable
661676Sjpk  * through an interface) based on the accreditation checks of the packet's
671676Sjpk  * security attributes against those associated with the next-hop gateway.
681676Sjpk  *
691676Sjpk  * The next-hop gateway's security attributes can be derived from two sources
701676Sjpk  * (in order of preference): route-related and the host database.  A Trusted
711676Sjpk  * system must be configured with at least the host database containing an
721676Sjpk  * entry for the next-hop gateway, or otherwise no accreditation checks can
731676Sjpk  * be performed, which may result in the inability to send packets to any
741676Sjpk  * off-link destination host.
751676Sjpk  *
761676Sjpk  * The major differences between the two sources are the number and type of
771676Sjpk  * security attributes used for accreditation checks.  A host database entry
781676Sjpk  * can contain at most one set of security attributes, specific only to the
791676Sjpk  * next-hop gateway.  On contrast, route-related security attributes are made
801676Sjpk  * up of a collection of security attributes for the distant networks, and
811676Sjpk  * are grouped together per next-hop gateway used to reach those networks.
821676Sjpk  * This is the preferred method, and the routing subsystem will fallback to
831676Sjpk  * the host database entry only if there are no route-related attributes
841676Sjpk  * associated with the next-hop gateway.
851676Sjpk  *
861676Sjpk  * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/
871676Sjpk  * INTERFACE type) are initialized to contain a placeholder to store this
881676Sjpk  * information.  The ire_gw_secattr structure gets allocated, initialized
891676Sjpk  * and associated with the IRE during the time of the IRE creation.  The
901676Sjpk  * initialization process also includes resolving the host database entry
911676Sjpk  * of the next-hop gateway for fallback purposes.  It does not include any
921676Sjpk  * route-related attribute setup, as that process comes separately as part
931676Sjpk  * of the route requests (add/change) made to the routing subsystem.
941676Sjpk  *
951676Sjpk  * The underlying logic which involves associating IREs with the gateway
961676Sjpk  * security attributes are represented by the following data structures:
971676Sjpk  *
981676Sjpk  * tsol_gcdb_t, or "gcdb"
991676Sjpk  *
1001676Sjpk  *	- This is a system-wide collection of records containing the
1011676Sjpk  *	  currently used route-related security attributes, which are fed
1021676Sjpk  *	  through the routing socket interface, e.g. "route add/change".
1031676Sjpk  *
1041676Sjpk  * tsol_gc_t, or "gc"
1051676Sjpk  *
1061676Sjpk  *	- This is the gateway credential structure, and it provides for the
1071676Sjpk  *	  only mechanism to access the contents of gcdb.  More than one gc
1081676Sjpk  *	  entries may refer to the same gcdb record.  gc's in the system are
1091676Sjpk  *	  grouped according to the next-hop gateway address.
1101676Sjpk  *
1111676Sjpk  * tsol_gcgrp_t, or "gcgrp"
1121676Sjpk  *
1131676Sjpk  *	- Group of gateway credentials, and is unique per next-hop gateway
1141676Sjpk  *	  address.  When the group is not empty, i.e. when gcgrp_count is
1151676Sjpk  *	  greater than zero, it contains one or more gc's, each pointing to
1161676Sjpk  *	  a gcdb record which indicates the gateway security attributes
1171676Sjpk  *	  associated with the next-hop gateway.
1181676Sjpk  *
1191676Sjpk  * The fields of the tsol_ire_gw_secattr_t used from within the IRE are:
1201676Sjpk  *
1211676Sjpk  * igsa_lock
1221676Sjpk  *
1231676Sjpk  *	- Lock that protects all fields within tsol_ire_gw_secattr_t.
1241676Sjpk  *
1251676Sjpk  * igsa_rhc
1261676Sjpk  *
1271676Sjpk  *	- Remote host cache database entry of next-hop gateway.  This is
1281676Sjpk  *	  used in the case when there are no route-related attributes
1291676Sjpk  *	  configured for the IRE.
1301676Sjpk  *
1311676Sjpk  * igsa_gc
1321676Sjpk  *
1331676Sjpk  *	- A set of route-related attributes that only get set for prefix
1341676Sjpk  *	  IREs.  If this is non-NULL, the prefix IRE has been associated
1351676Sjpk  *	  with a set of gateway security attributes by way of route add/
1361676Sjpk  *	  change functionality.  This field stays NULL for IRE_CACHEs.
1371676Sjpk  *
1381676Sjpk  * igsa_gcgrp
1391676Sjpk  *
1401676Sjpk  *	- Group of gc's which only gets set for IRE_CACHEs.  Each of the gc
1411676Sjpk  *	  points to a gcdb record that contains the security attributes
1421676Sjpk  *	  used to perform the credential checks of the packet which uses
1431676Sjpk  *	  the IRE.  If the group is not empty, the list of gc's can be
1441676Sjpk  *	  traversed starting at gcgrp_head.  This field stays NULL for
1451676Sjpk  *	  prefix IREs.
1461676Sjpk  */
1471676Sjpk 
1481676Sjpk static kmem_cache_t *ire_gw_secattr_cache;
1491676Sjpk 
1501676Sjpk #define	GCDB_HASH_SIZE	101
1511676Sjpk #define	GCGRP_HASH_SIZE	101
1521676Sjpk 
1531676Sjpk #define	GCDB_REFRELE(p) {		\
1541676Sjpk 	mutex_enter(&gcdb_lock);	\
1551676Sjpk 	ASSERT((p)->gcdb_refcnt > 0);	\
1561676Sjpk 	if (--((p)->gcdb_refcnt) == 0)	\
1571676Sjpk 		gcdb_inactive(p);	\
1581676Sjpk 	ASSERT(MUTEX_HELD(&gcdb_lock));	\
1591676Sjpk 	mutex_exit(&gcdb_lock);		\
1601676Sjpk }
1611676Sjpk 
1621676Sjpk static int gcdb_hash_size = GCDB_HASH_SIZE;
1631676Sjpk static int gcgrp_hash_size = GCGRP_HASH_SIZE;
1641676Sjpk static mod_hash_t *gcdb_hash;
1651676Sjpk static mod_hash_t *gcgrp4_hash;
1661676Sjpk static mod_hash_t *gcgrp6_hash;
1671676Sjpk 
1681676Sjpk static kmutex_t gcdb_lock;
1691676Sjpk kmutex_t gcgrp_lock;
1701676Sjpk 
1711676Sjpk static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t);
1721676Sjpk static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t);
1731676Sjpk static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t);
1741676Sjpk static void gcdb_inactive(tsol_gcdb_t *);
1751676Sjpk 
1761676Sjpk static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t);
1771676Sjpk static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t);
1781676Sjpk 
1791676Sjpk static int ire_gw_secattr_constructor(void *, void *, int);
1801676Sjpk static void ire_gw_secattr_destructor(void *, void *);
1811676Sjpk 
1821676Sjpk void
1831676Sjpk tnet_init(void)
1841676Sjpk {
1851676Sjpk 	ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache",
1861676Sjpk 	    sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor,
1871676Sjpk 	    ire_gw_secattr_destructor, NULL, NULL, NULL, 0);
1881676Sjpk 
1891676Sjpk 	gcdb_hash = mod_hash_create_extended("gcdb_hash",
1901676Sjpk 	    gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
1911676Sjpk 	    gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP);
1921676Sjpk 
1931676Sjpk 	gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash",
1941676Sjpk 	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
1951676Sjpk 	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
1961676Sjpk 
1971676Sjpk 	gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash",
1981676Sjpk 	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
1991676Sjpk 	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
2001676Sjpk 
2011676Sjpk 	mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL);
2021676Sjpk 	mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL);
2031676Sjpk }
2041676Sjpk 
2051676Sjpk void
2061676Sjpk tnet_fini(void)
2071676Sjpk {
2081676Sjpk 	kmem_cache_destroy(ire_gw_secattr_cache);
2091676Sjpk 	mod_hash_destroy_hash(gcdb_hash);
2101676Sjpk 	mod_hash_destroy_hash(gcgrp4_hash);
2111676Sjpk 	mod_hash_destroy_hash(gcgrp6_hash);
2121676Sjpk 	mutex_destroy(&gcdb_lock);
2131676Sjpk 	mutex_destroy(&gcgrp_lock);
2141676Sjpk }
2151676Sjpk 
2161676Sjpk /* ARGSUSED */
2171676Sjpk static int
2181676Sjpk ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags)
2191676Sjpk {
2201676Sjpk 	tsol_ire_gw_secattr_t *attrp = buf;
2211676Sjpk 
2221676Sjpk 	mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL);
2231676Sjpk 
2241676Sjpk 	attrp->igsa_rhc = NULL;
2251676Sjpk 	attrp->igsa_gc = NULL;
2261676Sjpk 	attrp->igsa_gcgrp = NULL;
2271676Sjpk 
2281676Sjpk 	return (0);
2291676Sjpk }
2301676Sjpk 
2311676Sjpk /* ARGSUSED */
2321676Sjpk static void
2331676Sjpk ire_gw_secattr_destructor(void *buf, void *cdrarg)
2341676Sjpk {
2351676Sjpk 	tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf;
2361676Sjpk 
2371676Sjpk 	mutex_destroy(&attrp->igsa_lock);
2381676Sjpk }
2391676Sjpk 
2401676Sjpk tsol_ire_gw_secattr_t *
2411676Sjpk ire_gw_secattr_alloc(int kmflags)
2421676Sjpk {
2431676Sjpk 	return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags));
2441676Sjpk }
2451676Sjpk 
2461676Sjpk void
2471676Sjpk ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp)
2481676Sjpk {
2491676Sjpk 	ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock));
2501676Sjpk 
2511676Sjpk 	if (attrp->igsa_rhc != NULL) {
2521676Sjpk 		TNRHC_RELE(attrp->igsa_rhc);
2531676Sjpk 		attrp->igsa_rhc = NULL;
2541676Sjpk 	}
2551676Sjpk 
2561676Sjpk 	if (attrp->igsa_gc != NULL) {
2571676Sjpk 		GC_REFRELE(attrp->igsa_gc);
2581676Sjpk 		attrp->igsa_gc = NULL;
2591676Sjpk 	}
2601676Sjpk 	if (attrp->igsa_gcgrp != NULL) {
2611676Sjpk 		GCGRP_REFRELE(attrp->igsa_gcgrp);
2621676Sjpk 		attrp->igsa_gcgrp = NULL;
2631676Sjpk 	}
2641676Sjpk 
2651676Sjpk 	ASSERT(attrp->igsa_rhc == NULL);
2661676Sjpk 	ASSERT(attrp->igsa_gc == NULL);
2671676Sjpk 	ASSERT(attrp->igsa_gcgrp == NULL);
2681676Sjpk 
2691676Sjpk 	kmem_cache_free(ire_gw_secattr_cache, attrp);
2701676Sjpk }
2711676Sjpk 
2721676Sjpk /* ARGSUSED */
2731676Sjpk static uint_t
2741676Sjpk gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key)
2751676Sjpk {
2761676Sjpk 	const struct rtsa_s *rp = (struct rtsa_s *)key;
2771676Sjpk 	const uint32_t *up, *ue;
2781676Sjpk 	uint_t hash;
2791676Sjpk 	int i;
2801676Sjpk 
2811676Sjpk 	ASSERT(rp != NULL);
2821676Sjpk 
2831676Sjpk 	/* See comments in hash_bylabel in zone.c for details */
2841676Sjpk 	hash = rp->rtsa_doi + (rp->rtsa_doi << 1);
2851676Sjpk 	up = (const uint32_t *)&rp->rtsa_slrange;
2861676Sjpk 	ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up);
2871676Sjpk 	i = 1;
2881676Sjpk 	while (up < ue) {
2891676Sjpk 		/* using 2^n + 1, 1 <= n <= 16 as source of many primes */
2901676Sjpk 		hash += *up + (*up << ((i % 16) + 1));
2911676Sjpk 		up++;
2921676Sjpk 		i++;
2931676Sjpk 	}
2941676Sjpk 	return (hash);
2951676Sjpk }
2961676Sjpk 
2971676Sjpk static int
2981676Sjpk gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
2991676Sjpk {
3001676Sjpk 	struct rtsa_s *rp1 = (struct rtsa_s *)key1;
3011676Sjpk 	struct rtsa_s *rp2 = (struct rtsa_s *)key2;
3021676Sjpk 
3031676Sjpk 	ASSERT(rp1 != NULL && rp2 != NULL);
3041676Sjpk 
3051676Sjpk 	if (blequal(&rp1->rtsa_slrange.lower_bound,
3061676Sjpk 	    &rp2->rtsa_slrange.lower_bound) &&
3071676Sjpk 	    blequal(&rp1->rtsa_slrange.upper_bound,
3081676Sjpk 	    &rp2->rtsa_slrange.upper_bound) &&
3091676Sjpk 	    rp1->rtsa_doi == rp2->rtsa_doi)
3101676Sjpk 		return (0);
3111676Sjpk 
3121676Sjpk 	/* No match; not found */
3131676Sjpk 	return (-1);
3141676Sjpk }
3151676Sjpk 
3161676Sjpk /* ARGSUSED */
3171676Sjpk static uint_t
3181676Sjpk gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key)
3191676Sjpk {
3201676Sjpk 	tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key;
3211676Sjpk 	uint_t		idx = 0;
3221676Sjpk 	uint32_t	*ap;
3231676Sjpk 
3241676Sjpk 	ASSERT(ga != NULL);
3251676Sjpk 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
3261676Sjpk 
3271676Sjpk 	ap = (uint32_t *)&ga->ga_addr.s6_addr32[0];
3281676Sjpk 	idx ^= *ap++;
3291676Sjpk 	idx ^= *ap++;
3301676Sjpk 	idx ^= *ap++;
3311676Sjpk 	idx ^= *ap;
3321676Sjpk 
3331676Sjpk 	return (idx);
3341676Sjpk }
3351676Sjpk 
3361676Sjpk static int
3371676Sjpk gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
3381676Sjpk {
3391676Sjpk 	tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1;
3401676Sjpk 	tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2;
3411676Sjpk 
3421676Sjpk 	ASSERT(ga1 != NULL && ga2 != NULL);
3431676Sjpk 
3441676Sjpk 	/* Address family must match */
3451676Sjpk 	if (ga1->ga_af != ga2->ga_af)
3461676Sjpk 		return (-1);
3471676Sjpk 
3481676Sjpk 	if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] &&
3491676Sjpk 	    ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] &&
3501676Sjpk 	    ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] &&
3511676Sjpk 	    ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3])
3521676Sjpk 		return (0);
3531676Sjpk 
3541676Sjpk 	/* No match; not found */
3551676Sjpk 	return (-1);
3561676Sjpk }
3571676Sjpk 
3581676Sjpk #define	RTSAFLAGS	"\20\11cipso\3doi\2max_sl\1min_sl"
3591676Sjpk 
3601676Sjpk int
3611676Sjpk rtsa_validate(const struct rtsa_s *rp)
3621676Sjpk {
3631676Sjpk 	uint32_t mask = rp->rtsa_mask;
3641676Sjpk 
3651676Sjpk 	/* RTSA_CIPSO must be set, and DOI must not be zero */
3661676Sjpk 	if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) {
3671676Sjpk 		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
3681676Sjpk 		    "rtsa(1) lacks flag or has 0 doi.",
3691676Sjpk 		    rtsa_s *, rp);
3701676Sjpk 		return (EINVAL);
3711676Sjpk 	}
3721676Sjpk 	/*
3731676Sjpk 	 * SL range must be specified, and it must have its
3741676Sjpk 	 * upper bound dominating its lower bound.
3751676Sjpk 	 */
3761676Sjpk 	if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE ||
3771676Sjpk 	    !bldominates(&rp->rtsa_slrange.upper_bound,
3781676Sjpk 	    &rp->rtsa_slrange.lower_bound)) {
3791676Sjpk 		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
3801676Sjpk 		    "rtsa(1) min_sl and max_sl not set or max_sl is "
3811676Sjpk 		    "not dominating.", rtsa_s *, rp);
3821676Sjpk 		return (EINVAL);
3831676Sjpk 	}
3841676Sjpk 	return (0);
3851676Sjpk }
3861676Sjpk 
3871676Sjpk /*
3881676Sjpk  * A brief explanation of the reference counting scheme:
3891676Sjpk  *
3901676Sjpk  * Prefix IREs have a non-NULL igsa_gc and a NULL igsa_gcgrp;
3911676Sjpk  * IRE_CACHEs have it vice-versa.
3921676Sjpk  *
3931676Sjpk  * Apart from dynamic references due to to reference holds done
3941676Sjpk  * actively by threads, we have the following references:
3951676Sjpk  *
3961676Sjpk  * gcdb_refcnt:
3971676Sjpk  *	- Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference
3981676Sjpk  *	  to the gcdb_refcnt.
3991676Sjpk  *
4001676Sjpk  * gc_refcnt:
4011676Sjpk  *	- A prefix IRE that points to an igsa_gc contributes a reference
4021676Sjpk  *	  to the gc_refcnt.
4031676Sjpk  *
4041676Sjpk  * gcgrp_refcnt:
4051676Sjpk  *	- An IRE_CACHE that points to an igsa_gcgrp contributes a reference
4061676Sjpk  *	  to the gcgrp_refcnt of the associated tsol_gcgrp_t.
4071676Sjpk  *	- Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes
4081676Sjpk  *	  a reference to the gcgrp_refcnt.
4091676Sjpk  */
4101676Sjpk static tsol_gcdb_t *
4111676Sjpk gcdb_lookup(struct rtsa_s *rp, boolean_t alloc)
4121676Sjpk {
4131676Sjpk 	tsol_gcdb_t *gcdb = NULL;
4141676Sjpk 
4151676Sjpk 	if (rtsa_validate(rp) != 0)
4161676Sjpk 		return (NULL);
4171676Sjpk 
4181676Sjpk 	mutex_enter(&gcdb_lock);
4191676Sjpk 	/* Find a copy in the cache; otherwise, create one and cache it */
4201676Sjpk 	if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp,
4211676Sjpk 	    (mod_hash_val_t *)&gcdb) == 0) {
4221676Sjpk 		gcdb->gcdb_refcnt++;
4231676Sjpk 		ASSERT(gcdb->gcdb_refcnt != 0);
4241676Sjpk 
4251676Sjpk 		DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *,
4261676Sjpk 		    "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb);
4271676Sjpk 	} else if (alloc) {
4281676Sjpk 		gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP);
4291676Sjpk 		if (gcdb != NULL) {
4301676Sjpk 			gcdb->gcdb_refcnt = 1;
4311676Sjpk 			gcdb->gcdb_mask = rp->rtsa_mask;
4321676Sjpk 			gcdb->gcdb_doi = rp->rtsa_doi;
4331676Sjpk 			gcdb->gcdb_slrange = rp->rtsa_slrange;
4341676Sjpk 
4351676Sjpk 			if (mod_hash_insert(gcdb_hash,
4361676Sjpk 			    (mod_hash_key_t)&gcdb->gcdb_attr,
4371676Sjpk 			    (mod_hash_val_t)gcdb) != 0) {
4381676Sjpk 				mutex_exit(&gcdb_lock);
4391676Sjpk 				kmem_free(gcdb, sizeof (*gcdb));
4401676Sjpk 				return (NULL);
4411676Sjpk 			}
4421676Sjpk 
4431676Sjpk 			DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *,
4441676Sjpk 			    "gcdb(1) inserted in gcdb_hash(global)",
4451676Sjpk 			    tsol_gcdb_t *, gcdb);
4461676Sjpk 		}
4471676Sjpk 	}
4481676Sjpk 	mutex_exit(&gcdb_lock);
4491676Sjpk 	return (gcdb);
4501676Sjpk }
4511676Sjpk 
4521676Sjpk static void
4531676Sjpk gcdb_inactive(tsol_gcdb_t *gcdb)
4541676Sjpk {
4551676Sjpk 	ASSERT(MUTEX_HELD(&gcdb_lock));
4561676Sjpk 	ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0);
4571676Sjpk 
4581676Sjpk 	(void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr,
4591676Sjpk 	    (mod_hash_val_t *)&gcdb);
4601676Sjpk 
4611676Sjpk 	DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *,
4621676Sjpk 	    "gcdb(1) removed from gcdb_hash(global)",
4631676Sjpk 	    tsol_gcdb_t *, gcdb);
4641676Sjpk 	kmem_free(gcdb, sizeof (*gcdb));
4651676Sjpk }
4661676Sjpk 
4671676Sjpk tsol_gc_t *
4681676Sjpk gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp)
4691676Sjpk {
4701676Sjpk 	tsol_gc_t *gc;
4711676Sjpk 	tsol_gcdb_t *gcdb;
4721676Sjpk 
4731676Sjpk 	*gcgrp_xtrarefp = B_TRUE;
4741676Sjpk 
4751676Sjpk 	rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER);
4761676Sjpk 	if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) {
4771676Sjpk 		rw_exit(&gcgrp->gcgrp_rwlock);
4781676Sjpk 		return (NULL);
4791676Sjpk 	}
4801676Sjpk 
4811676Sjpk 	for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) {
4821676Sjpk 		if (gc->gc_db == gcdb) {
4831676Sjpk 			ASSERT(gc->gc_grp == gcgrp);
4841676Sjpk 
4851676Sjpk 			gc->gc_refcnt++;
4861676Sjpk 			ASSERT(gc->gc_refcnt != 0);
4871676Sjpk 
4881676Sjpk 			GCDB_REFRELE(gcdb);
4891676Sjpk 
4901676Sjpk 			DTRACE_PROBE3(tx__gcdb__log__info__gc__create,
4911676Sjpk 			    char *, "found gc(1) in gcgrp(2)",
4921676Sjpk 			    tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
4931676Sjpk 			rw_exit(&gcgrp->gcgrp_rwlock);
4941676Sjpk 			return (gc);
4951676Sjpk 		}
4961676Sjpk 	}
4971676Sjpk 
4981676Sjpk 	gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP);
4991676Sjpk 	if (gc != NULL) {
5001676Sjpk 		if (gcgrp->gcgrp_head == NULL) {
5011676Sjpk 			gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc;
5021676Sjpk 		} else {
5031676Sjpk 			gcgrp->gcgrp_tail->gc_next = gc;
5041676Sjpk 			gc->gc_prev = gcgrp->gcgrp_tail;
5051676Sjpk 			gcgrp->gcgrp_tail = gc;
5061676Sjpk 		}
5071676Sjpk 		gcgrp->gcgrp_count++;
5081676Sjpk 		ASSERT(gcgrp->gcgrp_count != 0);
5091676Sjpk 
5101676Sjpk 		/* caller has incremented gcgrp reference for us */
5111676Sjpk 		gc->gc_grp = gcgrp;
5121676Sjpk 
5131676Sjpk 		gc->gc_db = gcdb;
5141676Sjpk 		gc->gc_refcnt = 1;
5151676Sjpk 
5161676Sjpk 		DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *,
5171676Sjpk 		    "added gc(1) to gcgrp(2)", tsol_gc_t *, gc,
5181676Sjpk 		    tsol_gcgrp_t *, gcgrp);
5191676Sjpk 
5201676Sjpk 		*gcgrp_xtrarefp = B_FALSE;
5211676Sjpk 	}
5221676Sjpk 	rw_exit(&gcgrp->gcgrp_rwlock);
5231676Sjpk 
5241676Sjpk 	return (gc);
5251676Sjpk }
5261676Sjpk 
5271676Sjpk void
5281676Sjpk gc_inactive(tsol_gc_t *gc)
5291676Sjpk {
5301676Sjpk 	tsol_gcgrp_t *gcgrp = gc->gc_grp;
5311676Sjpk 
5321676Sjpk 	ASSERT(gcgrp != NULL);
5331676Sjpk 	ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock));
5341676Sjpk 	ASSERT(gc->gc_refcnt == 0);
5351676Sjpk 
5361676Sjpk 	if (gc->gc_prev != NULL)
5371676Sjpk 		gc->gc_prev->gc_next = gc->gc_next;
5381676Sjpk 	else
5391676Sjpk 		gcgrp->gcgrp_head = gc->gc_next;
5401676Sjpk 	if (gc->gc_next != NULL)
5411676Sjpk 		gc->gc_next->gc_prev = gc->gc_prev;
5421676Sjpk 	else
5431676Sjpk 		gcgrp->gcgrp_tail = gc->gc_prev;
5441676Sjpk 	ASSERT(gcgrp->gcgrp_count > 0);
5451676Sjpk 	gcgrp->gcgrp_count--;
5461676Sjpk 
5471676Sjpk 	/* drop lock before it's destroyed */
5481676Sjpk 	rw_exit(&gcgrp->gcgrp_rwlock);
5491676Sjpk 
5501676Sjpk 	DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *,
5511676Sjpk 	    "removed inactive gc(1) from gcgrp(2)",
5521676Sjpk 	    tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
5531676Sjpk 
5541676Sjpk 	GCGRP_REFRELE(gcgrp);
5551676Sjpk 
5561676Sjpk 	gc->gc_grp = NULL;
5571676Sjpk 	gc->gc_prev = gc->gc_next = NULL;
5581676Sjpk 
5591676Sjpk 	if (gc->gc_db != NULL)
5601676Sjpk 		GCDB_REFRELE(gc->gc_db);
5611676Sjpk 
5621676Sjpk 	kmem_free(gc, sizeof (*gc));
5631676Sjpk }
5641676Sjpk 
5651676Sjpk tsol_gcgrp_t *
5661676Sjpk gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc)
5671676Sjpk {
5681676Sjpk 	tsol_gcgrp_t *gcgrp = NULL;
5691676Sjpk 	mod_hash_t *hashp;
5701676Sjpk 
5711676Sjpk 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
5721676Sjpk 
5731676Sjpk 	hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
5741676Sjpk 
5751676Sjpk 	mutex_enter(&gcgrp_lock);
5761676Sjpk 	if (mod_hash_find(hashp, (mod_hash_key_t)ga,
5771676Sjpk 	    (mod_hash_val_t *)&gcgrp) == 0) {
5781676Sjpk 		gcgrp->gcgrp_refcnt++;
5791676Sjpk 		ASSERT(gcgrp->gcgrp_refcnt != 0);
5801676Sjpk 
5811676Sjpk 		DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *,
5821676Sjpk 		    "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp,
5831676Sjpk 		    mod_hash_t *, hashp);
5841676Sjpk 
5851676Sjpk 	} else if (alloc) {
5861676Sjpk 		gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP);
5871676Sjpk 		if (gcgrp != NULL) {
5881676Sjpk 			gcgrp->gcgrp_refcnt = 1;
5891676Sjpk 			rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL);
5901676Sjpk 			bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga));
5911676Sjpk 
5921676Sjpk 			if (mod_hash_insert(hashp,
5931676Sjpk 			    (mod_hash_key_t)&gcgrp->gcgrp_addr,
5941676Sjpk 			    (mod_hash_val_t)gcgrp) != 0) {
5951676Sjpk 				mutex_exit(&gcgrp_lock);
5961676Sjpk 				kmem_free(gcgrp, sizeof (*gcgrp));
5971676Sjpk 				return (NULL);
5981676Sjpk 			}
5991676Sjpk 
6001676Sjpk 			DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert,
6011676Sjpk 			    char *, "inserted gcgrp(1) in hash(2)",
6021676Sjpk 			    tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
6031676Sjpk 		}
6041676Sjpk 	}
6051676Sjpk 	mutex_exit(&gcgrp_lock);
6061676Sjpk 	return (gcgrp);
6071676Sjpk }
6081676Sjpk 
6091676Sjpk void
6101676Sjpk gcgrp_inactive(tsol_gcgrp_t *gcgrp)
6111676Sjpk {
6121676Sjpk 	tsol_gcgrp_addr_t *ga;
6131676Sjpk 	mod_hash_t *hashp;
6141676Sjpk 
6151676Sjpk 	ASSERT(MUTEX_HELD(&gcgrp_lock));
6161676Sjpk 	ASSERT(!RW_LOCK_HELD(&gcgrp->gcgrp_rwlock));
6171676Sjpk 	ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0);
6181676Sjpk 	ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0);
6191676Sjpk 
6201676Sjpk 	ga = &gcgrp->gcgrp_addr;
6211676Sjpk 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
6221676Sjpk 
6231676Sjpk 	hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
6241676Sjpk 	(void) mod_hash_remove(hashp, (mod_hash_key_t)ga,
6251676Sjpk 	    (mod_hash_val_t *)&gcgrp);
6261676Sjpk 	rw_destroy(&gcgrp->gcgrp_rwlock);
6271676Sjpk 
6281676Sjpk 	DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *,
6291676Sjpk 	    "removed inactive gcgrp(1) from hash(2)",
6301676Sjpk 	    tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
6311676Sjpk 
6321676Sjpk 	kmem_free(gcgrp, sizeof (*gcgrp));
6331676Sjpk }
6341676Sjpk 
635*10934Ssommerfeld@sun.com 
636*10934Ssommerfeld@sun.com /*
637*10934Ssommerfeld@sun.com  * Assign a sensitivity label to inbound traffic which arrived without
638*10934Ssommerfeld@sun.com  * an explicit on-the-wire label.
639*10934Ssommerfeld@sun.com  *
640*10934Ssommerfeld@sun.com  * In the case of CIPSO-type hosts, we assume packets arriving without
641*10934Ssommerfeld@sun.com  * a label are at the most sensitive label known for the host, most
642*10934Ssommerfeld@sun.com  * likely involving out-of-band key management traffic (such as IKE,
643*10934Ssommerfeld@sun.com  * etc.,)
644*10934Ssommerfeld@sun.com  */
645*10934Ssommerfeld@sun.com static boolean_t
646*10934Ssommerfeld@sun.com tsol_find_unlabeled_label(tsol_tpc_t *rhtp, bslabel_t *sl, uint32_t *doi)
647*10934Ssommerfeld@sun.com {
648*10934Ssommerfeld@sun.com 	*doi = rhtp->tpc_tp.tp_doi;
649*10934Ssommerfeld@sun.com 	switch (rhtp->tpc_tp.host_type) {
650*10934Ssommerfeld@sun.com 	case UNLABELED:
651*10934Ssommerfeld@sun.com 		*sl = rhtp->tpc_tp.tp_def_label;
652*10934Ssommerfeld@sun.com 		break;
653*10934Ssommerfeld@sun.com 	case SUN_CIPSO:
654*10934Ssommerfeld@sun.com 		*sl = rhtp->tpc_tp.tp_sl_range_cipso.upper_bound;
655*10934Ssommerfeld@sun.com 		break;
656*10934Ssommerfeld@sun.com 	default:
657*10934Ssommerfeld@sun.com 		return (B_FALSE);
658*10934Ssommerfeld@sun.com 	}
659*10934Ssommerfeld@sun.com 	setbltype(sl, SUN_SL_ID);
660*10934Ssommerfeld@sun.com 	return (B_TRUE);
661*10934Ssommerfeld@sun.com }
662*10934Ssommerfeld@sun.com 
6631676Sjpk /*
6641676Sjpk  * Converts CIPSO option to sensitivity label.
6651676Sjpk  * Validity checks based on restrictions defined in
6661676Sjpk  * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity)
6671676Sjpk  */
6681676Sjpk static boolean_t
6691676Sjpk cipso_to_sl(const uchar_t *option, bslabel_t *sl)
6701676Sjpk {
6711676Sjpk 	const struct cipso_option *co = (const struct cipso_option *)option;
6721676Sjpk 	const struct cipso_tag_type_1 *tt1;
6731676Sjpk 
6741676Sjpk 	tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0];
6751676Sjpk 	if (tt1->tag_type != 1 ||
6761676Sjpk 	    tt1->tag_length < TSOL_TT1_MIN_LENGTH ||
6771676Sjpk 	    tt1->tag_length > TSOL_TT1_MAX_LENGTH ||
6781676Sjpk 	    tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length)
6791676Sjpk 		return (B_FALSE);
6801676Sjpk 
6811676Sjpk 	bsllow(sl);	/* assumed: sets compartments to all zeroes */
6821676Sjpk 	LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl);
6831676Sjpk 	bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments,
6841676Sjpk 	    tt1->tag_length - TSOL_TT1_MIN_LENGTH);
6851676Sjpk 	return (B_TRUE);
6861676Sjpk }
6871676Sjpk 
6881676Sjpk /*
689*10934Ssommerfeld@sun.com  * If present, parse a CIPSO label in the incoming packet and
690*10934Ssommerfeld@sun.com  * construct a ts_label_t that reflects the CIPSO label and attach it
691*10934Ssommerfeld@sun.com  * to the dblk cred.  Later as the mblk flows up through the stack any
692*10934Ssommerfeld@sun.com  * code that needs to examine the packet label can inspect the label
693*10934Ssommerfeld@sun.com  * from the dblk cred. This function is called right in ip_rput for
694*10934Ssommerfeld@sun.com  * all packets, i.e. locally destined and to be forwarded packets. The
695*10934Ssommerfeld@sun.com  * forwarding path needs to examine the label to determine how to
696*10934Ssommerfeld@sun.com  * forward the packet.
6971676Sjpk  *
69810181SKen.Powell@Sun.COM  * This routine pulls all message text up into the first mblk.
69910181SKen.Powell@Sun.COM  * For IPv4, only the first 20 bytes of the IP header are guaranteed
70010181SKen.Powell@Sun.COM  * to exist. For IPv6, only the IPv6 header is guaranteed to exist.
7011676Sjpk  */
7021676Sjpk boolean_t
7031676Sjpk tsol_get_pkt_label(mblk_t *mp, int version)
7041676Sjpk {
705*10934Ssommerfeld@sun.com 	tsol_tpc_t	*src_rhtp = NULL;
7061676Sjpk 	uchar_t		*opt_ptr = NULL;
7071676Sjpk 	const ipha_t	*ipha;
7081676Sjpk 	bslabel_t	sl;
7091676Sjpk 	uint32_t	doi;
7101676Sjpk 	tsol_ip_label_t	label_type;
711*10934Ssommerfeld@sun.com 	uint32_t	label_flags = 0; /* flags to set in label */
7121676Sjpk 	const cipso_option_t *co;
7131676Sjpk 	const void	*src;
7141676Sjpk 	const ip6_t	*ip6h;
7158778SErik.Nordmark@Sun.COM 	cred_t		*credp;
7168778SErik.Nordmark@Sun.COM 	pid_t		cpid;
717*10934Ssommerfeld@sun.com 	int 		proto;
7181676Sjpk 
7191676Sjpk 	ASSERT(DB_TYPE(mp) == M_DATA);
7201676Sjpk 
7219710SKen.Powell@Sun.COM 	if (mp->b_cont != NULL && !pullupmsg(mp, -1))
7229710SKen.Powell@Sun.COM 		return (B_FALSE);
7239710SKen.Powell@Sun.COM 
7241676Sjpk 	if (version == IPV4_VERSION) {
72510181SKen.Powell@Sun.COM 		ASSERT(MBLKL(mp) >= IP_SIMPLE_HDR_LENGTH);
7261676Sjpk 		ipha = (const ipha_t *)mp->b_rptr;
7271676Sjpk 		src = &ipha->ipha_src;
72810181SKen.Powell@Sun.COM 		if (!tsol_get_option_v4(mp, &label_type, &opt_ptr))
72910181SKen.Powell@Sun.COM 			return (B_FALSE);
7301676Sjpk 	} else {
73110181SKen.Powell@Sun.COM 		ASSERT(MBLKL(mp) >= IPV6_HDR_LEN);
7321676Sjpk 		ip6h = (const ip6_t *)mp->b_rptr;
7331676Sjpk 		src = &ip6h->ip6_src;
73410181SKen.Powell@Sun.COM 		if (!tsol_get_option_v6(mp, &label_type, &opt_ptr))
73510181SKen.Powell@Sun.COM 			return (B_FALSE);
7361676Sjpk 	}
7371676Sjpk 
7381676Sjpk 	switch (label_type) {
7391676Sjpk 	case OPT_CIPSO:
7401676Sjpk 		/*
7411676Sjpk 		 * Convert the CIPSO label to the internal format
7421676Sjpk 		 * and attach it to the dblk cred.
7431676Sjpk 		 * Validity checks based on restrictions defined in
7441676Sjpk 		 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
7451676Sjpk 		 * (draft-ietf-cipso-ipsecurity)
7461676Sjpk 		 */
7471676Sjpk 		if (version == IPV6_VERSION && ip6opt_ls == 0)
7481676Sjpk 			return (B_FALSE);
7491676Sjpk 		co = (const struct cipso_option *)opt_ptr;
7501676Sjpk 		if ((co->cipso_length <
7511676Sjpk 		    TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) ||
7521676Sjpk 		    (co->cipso_length > IP_MAX_OPT_LENGTH))
7531676Sjpk 			return (B_FALSE);
7541676Sjpk 		bcopy(co->cipso_doi, &doi, sizeof (doi));
7551676Sjpk 		doi = ntohl(doi);
7561676Sjpk 		if (!cipso_to_sl(opt_ptr, &sl))
7571676Sjpk 			return (B_FALSE);
7581676Sjpk 		setbltype(&sl, SUN_SL_ID);
759*10934Ssommerfeld@sun.com 
760*10934Ssommerfeld@sun.com 		/*
761*10934Ssommerfeld@sun.com 		 * If the source was unlabeled, then flag as such,
762*10934Ssommerfeld@sun.com 		 * (since CIPSO routers may add headers)
763*10934Ssommerfeld@sun.com 		 */
764*10934Ssommerfeld@sun.com 
765*10934Ssommerfeld@sun.com 		if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
766*10934Ssommerfeld@sun.com 			return (B_FALSE);
767*10934Ssommerfeld@sun.com 
768*10934Ssommerfeld@sun.com 		if (src_rhtp->tpc_tp.host_type == UNLABELED)
769*10934Ssommerfeld@sun.com 			label_flags = TSLF_UNLABELED;
770*10934Ssommerfeld@sun.com 
771*10934Ssommerfeld@sun.com 		TPC_RELE(src_rhtp);
772*10934Ssommerfeld@sun.com 
7731676Sjpk 		break;
7741676Sjpk 
7751676Sjpk 	case OPT_NONE:
7761676Sjpk 		/*
777*10934Ssommerfeld@sun.com 		 * Handle special cases that may not be labeled, even
7781676Sjpk 		 * though the sending system may otherwise be configured as
7791676Sjpk 		 * labeled.
7801676Sjpk 		 *	- IGMP
7811676Sjpk 		 *	- IPv4 ICMP Router Discovery
7821676Sjpk 		 *	- IPv6 Neighbor Discovery
783*10934Ssommerfeld@sun.com 		 *	- IPsec ESP
7841676Sjpk 		 */
7851676Sjpk 		if (version == IPV4_VERSION) {
786*10934Ssommerfeld@sun.com 			proto = ipha->ipha_protocol;
787*10934Ssommerfeld@sun.com 			if (proto == IPPROTO_IGMP)
7881676Sjpk 				return (B_TRUE);
789*10934Ssommerfeld@sun.com 			if (proto == IPPROTO_ICMP) {
7901676Sjpk 				const struct icmp *icmp = (const struct icmp *)
7911676Sjpk 				    (mp->b_rptr + IPH_HDR_LENGTH(ipha));
7921676Sjpk 
79310181SKen.Powell@Sun.COM 				if ((uchar_t *)icmp + ICMP_MINLEN > mp->b_wptr)
79410181SKen.Powell@Sun.COM 					return (B_FALSE);
7951676Sjpk 				if (icmp->icmp_type == ICMP_ROUTERADVERT ||
7961676Sjpk 				    icmp->icmp_type == ICMP_ROUTERSOLICIT)
7971676Sjpk 					return (B_TRUE);
7981676Sjpk 			}
7991676Sjpk 		} else {
800*10934Ssommerfeld@sun.com 			proto = ip6h->ip6_nxt;
801*10934Ssommerfeld@sun.com 			if (proto == IPPROTO_ICMPV6) {
8021676Sjpk 				const icmp6_t *icmp6 = (const icmp6_t *)
8031676Sjpk 				    (mp->b_rptr + IPV6_HDR_LEN);
8041676Sjpk 
8051676Sjpk 				if ((uchar_t *)icmp6 + ICMP6_MINLEN >
80610181SKen.Powell@Sun.COM 				    mp->b_wptr)
80710181SKen.Powell@Sun.COM 					return (B_FALSE);
8081676Sjpk 				if (icmp6->icmp6_type >= MLD_LISTENER_QUERY &&
8091676Sjpk 				    icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE)
8101676Sjpk 					return (B_TRUE);
8111676Sjpk 			}
8121676Sjpk 		}
8131676Sjpk 
8141676Sjpk 		/*
8151676Sjpk 		 * Look up the tnrhtp database and get the implicit label
816*10934Ssommerfeld@sun.com 		 * that is associated with the sending host and attach
8171676Sjpk 		 * it to the packet.
8181676Sjpk 		 */
8191676Sjpk 		if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
8201676Sjpk 			return (B_FALSE);
8211676Sjpk 
822*10934Ssommerfeld@sun.com 		/*
823*10934Ssommerfeld@sun.com 		 * If peer is label-aware, mark as "implicit" rather than
824*10934Ssommerfeld@sun.com 		 * "unlabeled" to cause appropriate mac-exempt processing
825*10934Ssommerfeld@sun.com 		 * to happen.
826*10934Ssommerfeld@sun.com 		 */
827*10934Ssommerfeld@sun.com 		if (src_rhtp->tpc_tp.host_type == SUN_CIPSO)
828*10934Ssommerfeld@sun.com 			label_flags = TSLF_IMPLICIT_IN;
829*10934Ssommerfeld@sun.com 		else if (src_rhtp->tpc_tp.host_type == UNLABELED)
830*10934Ssommerfeld@sun.com 			label_flags = TSLF_UNLABELED;
831*10934Ssommerfeld@sun.com 		else {
832*10934Ssommerfeld@sun.com 			DTRACE_PROBE2(tx__get__pkt__label, char *,
833*10934Ssommerfeld@sun.com 			    "template(1) has unknown hosttype",
834*10934Ssommerfeld@sun.com 			    tsol_tpc_t *, src_rhtp);
835*10934Ssommerfeld@sun.com 		}
836*10934Ssommerfeld@sun.com 
837*10934Ssommerfeld@sun.com 
838*10934Ssommerfeld@sun.com 		if (!tsol_find_unlabeled_label(src_rhtp, &sl, &doi)) {
8391676Sjpk 			TPC_RELE(src_rhtp);
8401676Sjpk 			return (B_FALSE);
8411676Sjpk 		}
8421676Sjpk 		TPC_RELE(src_rhtp);
8431676Sjpk 		break;
8441676Sjpk 
8451676Sjpk 	default:
8461676Sjpk 		return (B_FALSE);
8471676Sjpk 	}
8481676Sjpk 
8491676Sjpk 	/* Make sure no other thread is messing with this mblk */
8501676Sjpk 	ASSERT(DB_REF(mp) == 1);
8518778SErik.Nordmark@Sun.COM 	/* Preserve db_cpid */
8528778SErik.Nordmark@Sun.COM 	credp = msg_extractcred(mp, &cpid);
8538778SErik.Nordmark@Sun.COM 	if (credp == NULL) {
8548778SErik.Nordmark@Sun.COM 		credp = newcred_from_bslabel(&sl, doi, KM_NOSLEEP);
8551676Sjpk 	} else {
8561676Sjpk 		cred_t	*newcr;
8571676Sjpk 
8588778SErik.Nordmark@Sun.COM 		newcr = copycred_from_bslabel(credp, &sl, doi,
8591676Sjpk 		    KM_NOSLEEP);
8608778SErik.Nordmark@Sun.COM 		crfree(credp);
8618778SErik.Nordmark@Sun.COM 		credp = newcr;
8621676Sjpk 	}
8639041SErik.Nordmark@Sun.COM 	if (credp == NULL)
8649041SErik.Nordmark@Sun.COM 		return (B_FALSE);
865*10934Ssommerfeld@sun.com 
866*10934Ssommerfeld@sun.com 	crgetlabel(credp)->tsl_flags |= label_flags;
867*10934Ssommerfeld@sun.com 
8689041SErik.Nordmark@Sun.COM 	mblk_setcred(mp, credp, cpid);
8699041SErik.Nordmark@Sun.COM 	crfree(credp);			/* mblk has ref on cred */
8701676Sjpk 
8711676Sjpk 	return (B_TRUE);
8721676Sjpk }
8731676Sjpk 
8741676Sjpk /*
8751676Sjpk  * This routine determines whether the given packet should be accepted locally.
8761676Sjpk  * It does a range/set check on the packet's label by looking up the given
8771676Sjpk  * address in the remote host database.
8781676Sjpk  */
8791676Sjpk boolean_t
8801676Sjpk tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
8811676Sjpk     boolean_t shared_addr, const conn_t *connp)
8821676Sjpk {
8831676Sjpk 	const cred_t *credp;
8841676Sjpk 	ts_label_t *plabel, *conn_plabel;
8851676Sjpk 	tsol_tpc_t *tp;
8861676Sjpk 	boolean_t retv;
8871676Sjpk 	const bslabel_t *label, *conn_label;
8881676Sjpk 
8891676Sjpk 	/*
8901676Sjpk 	 * The cases in which this can happen are:
8911676Sjpk 	 *	- IPv6 Router Alert, where ip_rput_data_v6 deliberately skips
8921676Sjpk 	 *	  over the label attachment process.
8931676Sjpk 	 *	- MLD output looped-back to ourselves.
8941676Sjpk 	 *	- IPv4 Router Discovery, where tsol_get_pkt_label intentionally
8951676Sjpk 	 *	  avoids the labeling process.
8961676Sjpk 	 * We trust that all valid paths in the code set the cred pointer when
8971676Sjpk 	 * needed.
8981676Sjpk 	 */
8998778SErik.Nordmark@Sun.COM 	if ((credp = msg_getcred(mp, NULL)) == NULL)
9001676Sjpk 		return (B_TRUE);
9011676Sjpk 
9021676Sjpk 	/*
9031676Sjpk 	 * If this packet is from the inside (not a remote host) and has the
9041676Sjpk 	 * same zoneid as the selected destination, then no checks are
9051676Sjpk 	 * necessary.  Membership in the zone is enough proof.  This is
9061676Sjpk 	 * intended to be a hot path through this function.
9071676Sjpk 	 */
9081676Sjpk 	if (!crisremote(credp) &&
9091676Sjpk 	    crgetzone(credp) == crgetzone(connp->conn_cred))
9101676Sjpk 		return (B_TRUE);
9111676Sjpk 
9121676Sjpk 	plabel = crgetlabel(credp);
9131676Sjpk 	conn_plabel = crgetlabel(connp->conn_cred);
9141676Sjpk 	ASSERT(plabel != NULL && conn_plabel != NULL);
9151676Sjpk 
9161676Sjpk 	label = label2bslabel(plabel);
9171676Sjpk 	conn_label = label2bslabel(crgetlabel(connp->conn_cred));
9181676Sjpk 
919*10934Ssommerfeld@sun.com 
920*10934Ssommerfeld@sun.com 	/*
921*10934Ssommerfeld@sun.com 	 * Implicitly labeled packets from label-aware sources
922*10934Ssommerfeld@sun.com 	 * go only to privileged receivers
923*10934Ssommerfeld@sun.com 	 */
924*10934Ssommerfeld@sun.com 	if ((plabel->tsl_flags & TSLF_IMPLICIT_IN) &&
925*10934Ssommerfeld@sun.com 	    (connp->conn_mac_mode != CONN_MAC_IMPLICIT)) {
926*10934Ssommerfeld@sun.com 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac_impl,
927*10934Ssommerfeld@sun.com 		    char *,
928*10934Ssommerfeld@sun.com 		    "implicitly labeled packet mp(1) for conn(2) "
929*10934Ssommerfeld@sun.com 		    "which isn't in implicit mac mode",
930*10934Ssommerfeld@sun.com 		    mblk_t *, mp, conn_t *, connp);
931*10934Ssommerfeld@sun.com 
932*10934Ssommerfeld@sun.com 		return (B_FALSE);
933*10934Ssommerfeld@sun.com 	}
934*10934Ssommerfeld@sun.com 
935*10934Ssommerfeld@sun.com 
9361676Sjpk 	/*
9371676Sjpk 	 * MLPs are always validated using the range and set of the local
9381676Sjpk 	 * address, even when the remote host is unlabeled.
9391676Sjpk 	 */
9401676Sjpk 	if (connp->conn_mlp_type == mlptBoth ||
9411676Sjpk 	/* LINTED: no consequent */
9421676Sjpk 	    connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) {
9431676Sjpk 		;
9441676Sjpk 
9451676Sjpk 	/*
9461676Sjpk 	 * If this is a packet from an unlabeled sender, then we must apply
9471676Sjpk 	 * different rules.  If the label is equal to the zone's label, then
9481676Sjpk 	 * it's allowed.  If it's not equal, but the zone is either the global
9491676Sjpk 	 * zone or the label is dominated by the zone's label, then allow it
9501676Sjpk 	 * as long as it's in the range configured for the destination.
9511676Sjpk 	 */
9521676Sjpk 	} else if (plabel->tsl_flags & TSLF_UNLABELED) {
9531676Sjpk 		if (plabel->tsl_doi == conn_plabel->tsl_doi &&
9541676Sjpk 		    blequal(label, conn_label))
9551676Sjpk 			return (B_TRUE);
9561676Sjpk 
9573448Sdh155122 		/*
9583448Sdh155122 		 * conn_zoneid is global for an exclusive stack, thus we use
9593448Sdh155122 		 * conn_cred to get the zoneid
9603448Sdh155122 		 */
961*10934Ssommerfeld@sun.com 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) ||
9623448Sdh155122 		    (crgetzoneid(connp->conn_cred) != GLOBAL_ZONEID &&
9631676Sjpk 		    (plabel->tsl_doi != conn_plabel->tsl_doi ||
9641676Sjpk 		    !bldominates(conn_label, label)))) {
9651676Sjpk 			DTRACE_PROBE3(
9661676Sjpk 			    tx__ip__log__drop__receivelocal__mac_unl,
9671676Sjpk 			    char *,
9681676Sjpk 			    "unlabeled packet mp(1) fails mac for conn(2)",
9691676Sjpk 			    mblk_t *, mp, conn_t *, connp);
9701676Sjpk 			return (B_FALSE);
9711676Sjpk 		}
9721676Sjpk 
9731676Sjpk 	/*
9744448Skp158701 	 * If this is a packet from a labeled sender, verify the
9754448Skp158701 	 * label on the packet matches the connection label.
9761676Sjpk 	 */
9774448Skp158701 	} else {
9784448Skp158701 		if (plabel->tsl_doi != conn_plabel->tsl_doi ||
9794448Skp158701 		    !blequal(label, conn_label)) {
9804448Skp158701 			DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp,
9814448Skp158701 			    char *,
9824448Skp158701 			    "packet mp(1) failed label match to SLP conn(2)",
9834448Skp158701 			    mblk_t *, mp, conn_t *, connp);
9844448Skp158701 			return (B_FALSE);
9854448Skp158701 		}
9861676Sjpk 		/*
9874448Skp158701 		 * No further checks will be needed if this is a zone-
9884448Skp158701 		 * specific address because (1) The process for bringing up
9894448Skp158701 		 * the interface ensures the zone's label is within the zone-
9904448Skp158701 		 * specific address's valid label range; (2) For cases where
9914448Skp158701 		 * the conn is bound to the unspecified addresses, ip fanout
9924448Skp158701 		 * logic ensures conn's zoneid equals the dest addr's zoneid;
9934448Skp158701 		 * (3) Mac-exempt and mlp logic above already handle all
9944448Skp158701 		 * cases where the zone label may not be the same as the
9954448Skp158701 		 * conn label.
9961676Sjpk 		 */
9974448Skp158701 		if (!shared_addr)
9981676Sjpk 			return (B_TRUE);
9991676Sjpk 	}
10001676Sjpk 
10011676Sjpk 	tp = find_tpc(addr, version, B_FALSE);
10021676Sjpk 	if (tp == NULL) {
10031676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr,
10041676Sjpk 		    char *, "dropping mp(1), host(2) lacks entry",
10051676Sjpk 		    mblk_t *, mp, void *, addr);
10061676Sjpk 		return (B_FALSE);
10071676Sjpk 	}
10081676Sjpk 
10091676Sjpk 	/*
10101676Sjpk 	 * The local host address should not be unlabeled at this point.  The
10111676Sjpk 	 * only way this can happen is that the destination isn't unicast.  We
10121676Sjpk 	 * assume that the packet should not have had a label, and thus should
10131676Sjpk 	 * have been handled by the TSLF_UNLABELED logic above.
10141676Sjpk 	 */
10151676Sjpk 	if (tp->tpc_tp.host_type == UNLABELED) {
10161676Sjpk 		retv = B_FALSE;
10171676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *,
10181676Sjpk 		    "mp(1) unlabeled source, but tp is not unlabeled.",
10191676Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
10201676Sjpk 
10211676Sjpk 	} else if (tp->tpc_tp.host_type != SUN_CIPSO) {
10221676Sjpk 		retv = B_FALSE;
10231676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *,
10241676Sjpk 		    "delivering mp(1), found unrecognized tpc(2) type.",
10251676Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
10261676Sjpk 
10271676Sjpk 	} else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
10281676Sjpk 		retv = B_FALSE;
10291676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
10301676Sjpk 		    "mp(1) could not be delievered to tp(2), doi mismatch",
10311676Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
10321676Sjpk 
10331676Sjpk 	} else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) &&
10341676Sjpk 	    !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) {
10351676Sjpk 		retv = B_FALSE;
10361676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
10371676Sjpk 		    "mp(1) could not be delievered to tp(2), bad mac",
10381676Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
10391676Sjpk 	} else {
10401676Sjpk 		retv = B_TRUE;
10411676Sjpk 	}
10421676Sjpk 
10431676Sjpk 	TPC_RELE(tp);
10441676Sjpk 
10451676Sjpk 	return (retv);
10461676Sjpk }
10471676Sjpk 
10481676Sjpk boolean_t
10491676Sjpk tsol_can_accept_raw(mblk_t *mp, boolean_t check_host)
10501676Sjpk {
10511676Sjpk 	ts_label_t	*plabel = NULL;
10521676Sjpk 	tsol_tpc_t	*src_rhtp, *dst_rhtp;
10531676Sjpk 	boolean_t	retv;
10548778SErik.Nordmark@Sun.COM 	cred_t		*credp;
10551676Sjpk 
10568778SErik.Nordmark@Sun.COM 	credp = msg_getcred(mp, NULL);
10578778SErik.Nordmark@Sun.COM 	if (credp != NULL)
10588778SErik.Nordmark@Sun.COM 		plabel = crgetlabel(credp);
10591676Sjpk 
10601676Sjpk 	/* We are bootstrapping or the internal template was never deleted */
10611676Sjpk 	if (plabel == NULL)
10621676Sjpk 		return (B_TRUE);
10631676Sjpk 
10641676Sjpk 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
10651676Sjpk 		ipha_t *ipha = (ipha_t *)mp->b_rptr;
10661676Sjpk 
10671676Sjpk 		src_rhtp = find_tpc(&ipha->ipha_src, IPV4_VERSION,
10681676Sjpk 		    B_FALSE);
10691676Sjpk 		if (src_rhtp == NULL)
10701676Sjpk 			return (B_FALSE);
10711676Sjpk 		dst_rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION,
10721676Sjpk 		    B_FALSE);
10731676Sjpk 	} else {
10741676Sjpk 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
10751676Sjpk 
10761676Sjpk 		src_rhtp = find_tpc(&ip6h->ip6_src, IPV6_VERSION,
10771676Sjpk 		    B_FALSE);
10781676Sjpk 		if (src_rhtp == NULL)
10791676Sjpk 			return (B_FALSE);
10801676Sjpk 		dst_rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION,
10811676Sjpk 		    B_FALSE);
10821676Sjpk 	}
10831676Sjpk 	if (dst_rhtp == NULL) {
10841676Sjpk 		TPC_RELE(src_rhtp);
10851676Sjpk 		return (B_FALSE);
10861676Sjpk 	}
10871676Sjpk 
10881676Sjpk 	if (label2doi(plabel) != src_rhtp->tpc_tp.tp_doi) {
10891676Sjpk 		retv = B_FALSE;
10901676Sjpk 
10911676Sjpk 	/*
10921676Sjpk 	 * Check that the packet's label is in the correct range for labeled
10931676Sjpk 	 * sender, or is equal to the default label for unlabeled sender.
10941676Sjpk 	 */
10951676Sjpk 	} else if ((src_rhtp->tpc_tp.host_type != UNLABELED &&
10961676Sjpk 	    !_blinrange(label2bslabel(plabel),
10971676Sjpk 	    &src_rhtp->tpc_tp.tp_sl_range_cipso) &&
10981676Sjpk 	    !blinlset(label2bslabel(plabel),
10991676Sjpk 	    src_rhtp->tpc_tp.tp_sl_set_cipso)) ||
11001676Sjpk 	    (src_rhtp->tpc_tp.host_type == UNLABELED &&
11011676Sjpk 	    !blequal(&plabel->tsl_label, &src_rhtp->tpc_tp.tp_def_label))) {
11021676Sjpk 		retv = B_FALSE;
11031676Sjpk 
11041676Sjpk 	} else if (check_host) {
11051676Sjpk 		retv = B_TRUE;
11061676Sjpk 
11071676Sjpk 	/*
11081676Sjpk 	 * Until we have SL range in the Zone structure, pass it
11091676Sjpk 	 * when our own address lookup returned an internal entry.
11101676Sjpk 	 */
11111676Sjpk 	} else switch (dst_rhtp->tpc_tp.host_type) {
11121676Sjpk 	case UNLABELED:
11131676Sjpk 		retv = B_TRUE;
11141676Sjpk 		break;
11151676Sjpk 
11161676Sjpk 	case SUN_CIPSO:
11171676Sjpk 		retv = _blinrange(label2bslabel(plabel),
11181676Sjpk 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) ||
11191676Sjpk 		    blinlset(label2bslabel(plabel),
11201676Sjpk 		    dst_rhtp->tpc_tp.tp_sl_set_cipso);
11211676Sjpk 		break;
11221676Sjpk 
11231676Sjpk 	default:
11241676Sjpk 		retv = B_FALSE;
11251676Sjpk 	}
11261676Sjpk 	TPC_RELE(src_rhtp);
11271676Sjpk 	TPC_RELE(dst_rhtp);
11281676Sjpk 	return (retv);
11291676Sjpk }
11301676Sjpk 
11311676Sjpk /*
11321676Sjpk  * This routine determines whether a response to a failed packet delivery or
11331676Sjpk  * connection should be sent back.  By default, the policy is to allow such
11341676Sjpk  * messages to be sent at all times, as these messages reveal little useful
11351676Sjpk  * information and are healthy parts of TCP/IP networking.
11361676Sjpk  *
11371676Sjpk  * If tsol_strict_error is set, then we do strict tests: if the packet label is
11381676Sjpk  * within the label range/set of this host/zone, return B_TRUE; otherwise
11391676Sjpk  * return B_FALSE, which causes the packet to be dropped silently.
11401676Sjpk  *
11411676Sjpk  * Note that tsol_get_pkt_label will cause the packet to drop if the sender is
11421676Sjpk  * marked as labeled in the remote host database, but the packet lacks a label.
11431676Sjpk  * This means that we don't need to do a lookup on the source; the
11441676Sjpk  * TSLF_UNLABELED flag is sufficient.
11451676Sjpk  */
11461676Sjpk boolean_t
11471676Sjpk tsol_can_reply_error(const mblk_t *mp)
11481676Sjpk {
11491676Sjpk 	ts_label_t	*plabel = NULL;
11501676Sjpk 	tsol_tpc_t	*rhtp;
11511676Sjpk 	const ipha_t	*ipha;
11521676Sjpk 	const ip6_t	*ip6h;
11531676Sjpk 	boolean_t	retv;
11541676Sjpk 	bslabel_t	*pktbs;
11558778SErik.Nordmark@Sun.COM 	cred_t		*credp;
11561676Sjpk 
11571676Sjpk 	/* Caller must pull up at least the IP header */
11581676Sjpk 	ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ?
11591676Sjpk 	    sizeof (*ipha) : sizeof (*ip6h)));
11601676Sjpk 
11611676Sjpk 	if (!tsol_strict_error)
11621676Sjpk 		return (B_TRUE);
11631676Sjpk 
11648778SErik.Nordmark@Sun.COM 	credp = msg_getcred(mp, NULL);
11658778SErik.Nordmark@Sun.COM 	if (credp != NULL)
11668778SErik.Nordmark@Sun.COM 		plabel = crgetlabel(credp);
11671676Sjpk 
11681676Sjpk 	/* We are bootstrapping or the internal template was never deleted */
11691676Sjpk 	if (plabel == NULL)
11701676Sjpk 		return (B_TRUE);
11711676Sjpk 
1172*10934Ssommerfeld@sun.com 	if (plabel->tsl_flags & TSLF_IMPLICIT_IN) {
1173*10934Ssommerfeld@sun.com 		DTRACE_PROBE3(tx__ip__log__drop__replyerror__unresolved__label,
1174*10934Ssommerfeld@sun.com 		    char *,
1175*10934Ssommerfeld@sun.com 		    "cannot send error report for packet mp(1) with "
1176*10934Ssommerfeld@sun.com 		    "unresolved security label sl(2)",
1177*10934Ssommerfeld@sun.com 		    mblk_t *, mp, ts_label_t *, plabel);
1178*10934Ssommerfeld@sun.com 		return (B_FALSE);
1179*10934Ssommerfeld@sun.com 	}
1180*10934Ssommerfeld@sun.com 
1181*10934Ssommerfeld@sun.com 
11821676Sjpk 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
11831676Sjpk 		ipha = (const ipha_t *)mp->b_rptr;
11841676Sjpk 		rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, B_FALSE);
11851676Sjpk 	} else {
11861676Sjpk 		ip6h = (const ip6_t *)mp->b_rptr;
11871676Sjpk 		rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, B_FALSE);
11881676Sjpk 	}
11891676Sjpk 
11901676Sjpk 	if (rhtp == NULL || label2doi(plabel) != rhtp->tpc_tp.tp_doi) {
11911676Sjpk 		retv = B_FALSE;
11921676Sjpk 	} else {
11931676Sjpk 		/*
11941676Sjpk 		 * If we're in the midst of forwarding, then the destination
11951676Sjpk 		 * address might not be labeled.  In that case, allow unlabeled
11961676Sjpk 		 * packets through only if the default label is the same, and
11971676Sjpk 		 * labeled ones if they dominate.
11981676Sjpk 		 */
11991676Sjpk 		pktbs = label2bslabel(plabel);
12001676Sjpk 		switch (rhtp->tpc_tp.host_type) {
12011676Sjpk 		case UNLABELED:
12021676Sjpk 			if (plabel->tsl_flags & TSLF_UNLABELED) {
12031676Sjpk 				retv = blequal(pktbs,
12041676Sjpk 				    &rhtp->tpc_tp.tp_def_label);
12051676Sjpk 			} else {
12061676Sjpk 				retv = bldominates(pktbs,
12071676Sjpk 				    &rhtp->tpc_tp.tp_def_label);
12081676Sjpk 			}
12091676Sjpk 			break;
12101676Sjpk 
12111676Sjpk 		case SUN_CIPSO:
12121676Sjpk 			retv = _blinrange(pktbs,
12131676Sjpk 			    &rhtp->tpc_tp.tp_sl_range_cipso) ||
12141676Sjpk 			    blinlset(pktbs, rhtp->tpc_tp.tp_sl_set_cipso);
12151676Sjpk 			break;
12161676Sjpk 
12171676Sjpk 		default:
12181676Sjpk 			retv = B_FALSE;
12191676Sjpk 			break;
12201676Sjpk 		}
12211676Sjpk 	}
12221676Sjpk 
12231676Sjpk 	if (rhtp != NULL)
12241676Sjpk 		TPC_RELE(rhtp);
12251676Sjpk 
12261676Sjpk 	return (retv);
12271676Sjpk }
12281676Sjpk 
12291676Sjpk /*
12301676Sjpk  * Finds the zone associated with the given packet.  Returns GLOBAL_ZONEID if
12311676Sjpk  * the zone cannot be located.
12321676Sjpk  *
12331676Sjpk  * This is used by the classifier when the packet matches an ALL_ZONES IRE, and
12341676Sjpk  * there's no MLP defined.
12353448Sdh155122  *
12363448Sdh155122  * Note that we assume that this is only invoked in the ALL_ZONES case.
12373448Sdh155122  * Handling other cases would require handle exclusive stack zones where either
12383448Sdh155122  * this routine or the callers would have to map from
12393448Sdh155122  * the zoneid (zone->zone_id) to what IP uses in conn_zoneid etc.
12401676Sjpk  */
12411676Sjpk zoneid_t
12421676Sjpk tsol_packet_to_zoneid(const mblk_t *mp)
12431676Sjpk {
12448778SErik.Nordmark@Sun.COM 	cred_t *cr = msg_getcred(mp, NULL);
12451676Sjpk 	zone_t *zone;
12461676Sjpk 	ts_label_t *label;
12471676Sjpk 
12481676Sjpk 	if (cr != NULL) {
12491676Sjpk 		if ((label = crgetlabel(cr)) != NULL) {
12501676Sjpk 			zone = zone_find_by_label(label);
12511676Sjpk 			if (zone != NULL) {
12521676Sjpk 				zoneid_t zoneid = zone->zone_id;
12531676Sjpk 
12541676Sjpk 				zone_rele(zone);
12551676Sjpk 				return (zoneid);
12561676Sjpk 			}
12571676Sjpk 		}
12581676Sjpk 	}
12591676Sjpk 	return (GLOBAL_ZONEID);
12601676Sjpk }
12611676Sjpk 
12621676Sjpk int
12631676Sjpk tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
12641676Sjpk {
12651676Sjpk 	int		error = 0;
12661676Sjpk 	tsol_ire_gw_secattr_t *attrp = NULL;
12671676Sjpk 	tsol_tnrhc_t	*gw_rhc = NULL;
12681676Sjpk 	tsol_gcgrp_t	*gcgrp = NULL;
12691676Sjpk 	tsol_gc_t	*gc = NULL;
12701676Sjpk 	in_addr_t	ga_addr4;
12711676Sjpk 	void		*paddr = NULL;
12721676Sjpk 
12731676Sjpk 	/* Not in Trusted mode or IRE is local/loopback/broadcast/interface */
12741676Sjpk 	if (!is_system_labeled() ||
12751676Sjpk 	    (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST |
12761676Sjpk 	    IRE_INTERFACE)))
12771676Sjpk 		goto done;
12781676Sjpk 
12791676Sjpk 	/*
12801676Sjpk 	 * If we don't have a label to compare with, or the IRE does not
12811676Sjpk 	 * contain any gateway security attributes, there's not much that
12821676Sjpk 	 * we can do.  We let the former case pass, and the latter fail,
12831676Sjpk 	 * since the IRE doesn't qualify for a match due to the lack of
12841676Sjpk 	 * security attributes.
12851676Sjpk 	 */
12861676Sjpk 	if (tsl == NULL || ire->ire_gw_secattr == NULL) {
12871676Sjpk 		if (tsl != NULL) {
1288*10934Ssommerfeld@sun.com 			DTRACE_PROBE3(
1289*10934Ssommerfeld@sun.com 			    tx__ip__log__drop__irematch__nogwsec, char *,
1290*10934Ssommerfeld@sun.com 			    "ire(1) lacks ire_gw_secattr when matching "
1291*10934Ssommerfeld@sun.com 			    "label(2)", ire_t *, ire, ts_label_t *, tsl);
12921676Sjpk 			error = EACCES;
12931676Sjpk 		}
12941676Sjpk 		goto done;
12951676Sjpk 	}
12961676Sjpk 
12971676Sjpk 	attrp = ire->ire_gw_secattr;
12981676Sjpk 
12991676Sjpk 	/*
13001676Sjpk 	 * The possible lock order scenarios related to the tsol gateway
13011676Sjpk 	 * attribute locks are documented at the beginning of ip.c in the
13021676Sjpk 	 * lock order scenario section.
13031676Sjpk 	 */
13041676Sjpk 	mutex_enter(&attrp->igsa_lock);
13051676Sjpk 
13061676Sjpk 	/*
13071676Sjpk 	 * Depending on the IRE type (prefix vs. cache), we seek the group
13081676Sjpk 	 * structure which contains all security credentials of the gateway.
13091676Sjpk 	 * A prefix IRE is associated with at most one gateway credential,
13101676Sjpk 	 * while a cache IRE is associated with every credentials that the
13111676Sjpk 	 * gateway has.
13121676Sjpk 	 */
13131676Sjpk 	if ((gc = attrp->igsa_gc) != NULL) {			/* prefix */
13141676Sjpk 		gcgrp = gc->gc_grp;
13151676Sjpk 		ASSERT(gcgrp != NULL);
13161676Sjpk 		rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
13171676Sjpk 	} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {	/* cache */
13181676Sjpk 		rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
13191676Sjpk 		gc = gcgrp->gcgrp_head;
13201676Sjpk 		if (gc == NULL) {
13211676Sjpk 			/* gc group is empty, so the drop lock now */
13221676Sjpk 			ASSERT(gcgrp->gcgrp_count == 0);
13231676Sjpk 			rw_exit(&gcgrp->gcgrp_rwlock);
13241676Sjpk 			gcgrp = NULL;
13251676Sjpk 		}
13261676Sjpk 	}
13271676Sjpk 
13281676Sjpk 	if (gcgrp != NULL)
13291676Sjpk 		GCGRP_REFHOLD(gcgrp);
13301676Sjpk 
13311676Sjpk 	if ((gw_rhc = attrp->igsa_rhc) != NULL) {
13321676Sjpk 		/*
13331676Sjpk 		 * If our cached entry has grown stale, then discard it so we
13341676Sjpk 		 * can get a new one.
13351676Sjpk 		 */
13361676Sjpk 		if (gw_rhc->rhc_invalid || gw_rhc->rhc_tpc->tpc_invalid) {
13371676Sjpk 			TNRHC_RELE(gw_rhc);
13381676Sjpk 			attrp->igsa_rhc = gw_rhc = NULL;
13391676Sjpk 		} else {
13401676Sjpk 			TNRHC_HOLD(gw_rhc)
13411676Sjpk 		}
13421676Sjpk 	}
13431676Sjpk 
13441676Sjpk 	/* Last attempt at loading the template had failed; try again */
13451676Sjpk 	if (gw_rhc == NULL) {
13461676Sjpk 		if (gcgrp != NULL) {
13471676Sjpk 			tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
13481676Sjpk 
13491676Sjpk 			if (ire->ire_ipversion == IPV4_VERSION) {
13501676Sjpk 				ASSERT(ga->ga_af == AF_INET);
13511676Sjpk 				IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
13521676Sjpk 				paddr = &ga_addr4;
13531676Sjpk 			} else {
13541676Sjpk 				ASSERT(ga->ga_af == AF_INET6);
13551676Sjpk 				paddr = &ga->ga_addr;
13561676Sjpk 			}
13571676Sjpk 		} else if (ire->ire_ipversion == IPV6_VERSION &&
13581676Sjpk 		    !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) {
13591676Sjpk 			paddr = &ire->ire_gateway_addr_v6;
13601676Sjpk 		} else if (ire->ire_ipversion == IPV4_VERSION &&
13611676Sjpk 		    ire->ire_gateway_addr != INADDR_ANY) {
13621676Sjpk 			paddr = &ire->ire_gateway_addr;
13631676Sjpk 		}
13641676Sjpk 
13651676Sjpk 		/* We've found a gateway address to do the template lookup */
13661676Sjpk 		if (paddr != NULL) {
13671676Sjpk 			ASSERT(gw_rhc == NULL);
13683292Skp158701 			gw_rhc = find_rhc(paddr, ire->ire_ipversion, B_FALSE);
13691676Sjpk 			if (gw_rhc != NULL) {
13701676Sjpk 				/*
13711676Sjpk 				 * Note that if the lookup above returned an
13721676Sjpk 				 * internal template, we'll use it for the
13731676Sjpk 				 * time being, and do another lookup next
13741676Sjpk 				 * time around.
13751676Sjpk 				 */
13761676Sjpk 				/* Another thread has loaded the template? */
13771676Sjpk 				if (attrp->igsa_rhc != NULL) {
13781676Sjpk 					TNRHC_RELE(gw_rhc)
13791676Sjpk 					/* reload, it could be different */
13801676Sjpk 					gw_rhc = attrp->igsa_rhc;
13811676Sjpk 				} else {
13821676Sjpk 					attrp->igsa_rhc = gw_rhc;
13831676Sjpk 				}
13841676Sjpk 				/*
13851676Sjpk 				 * Hold an extra reference just like we did
13861676Sjpk 				 * above prior to dropping the igsa_lock.
13871676Sjpk 				 */
13881676Sjpk 				TNRHC_HOLD(gw_rhc)
13891676Sjpk 			}
13901676Sjpk 		}
13911676Sjpk 	}
13921676Sjpk 
13931676Sjpk 	mutex_exit(&attrp->igsa_lock);
13941676Sjpk 	/* Gateway template not found */
13951676Sjpk 	if (gw_rhc == NULL) {
13961676Sjpk 		/*
13971676Sjpk 		 * If destination address is directly reachable through an
13981676Sjpk 		 * interface rather than through a learned route, pass it.
13991676Sjpk 		 */
14001676Sjpk 		if (paddr != NULL) {
14011676Sjpk 			DTRACE_PROBE3(
14021676Sjpk 			    tx__ip__log__drop__irematch__nogwtmpl, char *,
14031676Sjpk 			    "ire(1), label(2) off-link with no gw_rhc",
14041676Sjpk 			    ire_t *, ire, ts_label_t *, tsl);
14051676Sjpk 			error = EINVAL;
14061676Sjpk 		}
14071676Sjpk 		goto done;
14081676Sjpk 	}
14091676Sjpk 
14101676Sjpk 	if (gc != NULL) {
14111676Sjpk 		tsol_gcdb_t *gcdb;
14121676Sjpk 		/*
14131676Sjpk 		 * In the case of IRE_CACHE we've got one or more gateway
14141676Sjpk 		 * security credentials to compare against the passed in label.
14151676Sjpk 		 * Perform label range comparison against each security
14161676Sjpk 		 * credential of the gateway. In the case of a prefix ire
14171676Sjpk 		 * we need to match against the security attributes of
14181676Sjpk 		 * just the route itself, so the loop is executed only once.
14191676Sjpk 		 */
14201676Sjpk 		ASSERT(gcgrp != NULL);
14211676Sjpk 		do {
14221676Sjpk 			gcdb = gc->gc_db;
14231676Sjpk 			if (tsl->tsl_doi == gcdb->gcdb_doi &&
14241676Sjpk 			    _blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange))
14251676Sjpk 				break;
14261676Sjpk 			if (ire->ire_type == IRE_CACHE)
14271676Sjpk 				gc = gc->gc_next;
14281676Sjpk 			else
14291676Sjpk 				gc = NULL;
14301676Sjpk 		} while (gc != NULL);
14311676Sjpk 
14321676Sjpk 		if (gc == NULL) {
14331676Sjpk 			DTRACE_PROBE3(
14341676Sjpk 			    tx__ip__log__drop__irematch__nogcmatched,
14351676Sjpk 			    char *, "ire(1), tsl(2): all gc failed match",
14361676Sjpk 			    ire_t *, ire, ts_label_t *, tsl);
14371676Sjpk 			error = EACCES;
14381676Sjpk 		}
14391676Sjpk 	} else {
14401676Sjpk 		/*
14411676Sjpk 		 * We didn't find any gateway credentials in the IRE
14421676Sjpk 		 * attributes; fall back to the gateway's template for
14431676Sjpk 		 * label range checks, if we are required to do so.
14441676Sjpk 		 */
14451676Sjpk 		ASSERT(gw_rhc != NULL);
14461676Sjpk 		switch (gw_rhc->rhc_tpc->tpc_tp.host_type) {
14471676Sjpk 		case SUN_CIPSO:
14486596Skp158701 			if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
14491676Sjpk 			    (!_blinrange(&tsl->tsl_label,
14506596Skp158701 			    &gw_rhc->rhc_tpc->tpc_tp.tp_sl_range_cipso) &&
14511676Sjpk 			    !blinlset(&tsl->tsl_label,
14521676Sjpk 			    gw_rhc->rhc_tpc->tpc_tp.tp_sl_set_cipso))) {
14531676Sjpk 				error = EACCES;
14541676Sjpk 				DTRACE_PROBE4(
14551676Sjpk 				    tx__ip__log__drop__irematch__deftmpl,
14561676Sjpk 				    char *, "ire(1), tsl(2), gw_rhc(3) "
14571676Sjpk 				    "failed match (cipso gw)",
14581676Sjpk 				    ire_t *, ire, ts_label_t *, tsl,
14591676Sjpk 				    tsol_tnrhc_t *, gw_rhc);
14601676Sjpk 			}
14611676Sjpk 			break;
14621676Sjpk 
14631676Sjpk 		case UNLABELED:
14646596Skp158701 			if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
14651676Sjpk 			    (!_blinrange(&tsl->tsl_label,
14661676Sjpk 			    &gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_range) &&
14671676Sjpk 			    !blinlset(&tsl->tsl_label,
14681676Sjpk 			    gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_set))) {
14691676Sjpk 				error = EACCES;
14701676Sjpk 				DTRACE_PROBE4(
14711676Sjpk 				    tx__ip__log__drop__irematch__deftmpl,
14721676Sjpk 				    char *, "ire(1), tsl(2), gw_rhc(3) "
14731676Sjpk 				    "failed match (unlabeled gw)",
14741676Sjpk 				    ire_t *, ire, ts_label_t *, tsl,
14751676Sjpk 				    tsol_tnrhc_t *, gw_rhc);
14761676Sjpk 			}
14771676Sjpk 			break;
14781676Sjpk 		}
14791676Sjpk 	}
14801676Sjpk 
14811676Sjpk done:
14821676Sjpk 
14831676Sjpk 	if (gcgrp != NULL) {
14841676Sjpk 		rw_exit(&gcgrp->gcgrp_rwlock);
14851676Sjpk 		GCGRP_REFRELE(gcgrp);
14861676Sjpk 	}
14871676Sjpk 
14881676Sjpk 	if (gw_rhc != NULL)
14891676Sjpk 		TNRHC_RELE(gw_rhc)
14901676Sjpk 
14911676Sjpk 	return (error);
14921676Sjpk }
14931676Sjpk 
14941676Sjpk /*
14951676Sjpk  * Performs label accreditation checks for packet forwarding.
14961676Sjpk  *
14971676Sjpk  * Returns a pointer to the modified mblk if allowed for forwarding,
14981676Sjpk  * or NULL if the packet must be dropped.
14991676Sjpk  */
15001676Sjpk mblk_t *
15011676Sjpk tsol_ip_forward(ire_t *ire, mblk_t *mp)
15021676Sjpk {
15031676Sjpk 	tsol_ire_gw_secattr_t *attrp = NULL;
15041676Sjpk 	ipha_t		*ipha;
15051676Sjpk 	ip6_t		*ip6h;
15061676Sjpk 	const void	*pdst;
15071676Sjpk 	const void	*psrc;
15081676Sjpk 	boolean_t	off_link;
15091676Sjpk 	tsol_tpc_t	*dst_rhtp, *gw_rhtp;
15101676Sjpk 	tsol_ip_label_t label_type;
15111676Sjpk 	uchar_t		*opt_ptr = NULL;
15121676Sjpk 	ts_label_t	*tsl;
15131676Sjpk 	uint8_t		proto;
15141676Sjpk 	int		af, adjust;
15151676Sjpk 	uint16_t	iplen;
15162535Ssangeeta 	boolean_t	need_tpc_rele = B_FALSE;
15172535Ssangeeta 	ipaddr_t	*gw;
15183448Sdh155122 	ip_stack_t	*ipst = ire->ire_ipst;
15198778SErik.Nordmark@Sun.COM 	cred_t		*credp;
15209710SKen.Powell@Sun.COM 	pid_t		pid;
15211676Sjpk 
15221676Sjpk 	ASSERT(ire != NULL && mp != NULL);
15231676Sjpk 	ASSERT(ire->ire_stq != NULL);
15241676Sjpk 
15251676Sjpk 	af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6;
15261676Sjpk 
15271676Sjpk 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
15281676Sjpk 		ASSERT(ire->ire_ipversion == IPV4_VERSION);
15291676Sjpk 		ipha = (ipha_t *)mp->b_rptr;
15301676Sjpk 		psrc = &ipha->ipha_src;
15311676Sjpk 		pdst = &ipha->ipha_dst;
15321676Sjpk 		proto = ipha->ipha_protocol;
15331676Sjpk 
15342535Ssangeeta 		/*
15352535Ssangeeta 		 * off_link is TRUE if destination not directly reachable.
15362535Ssangeeta 		 * Surya note: we avoid creation of per-dst IRE_CACHE entries
15372535Ssangeeta 		 * for forwarded packets, so we set off_link to be TRUE
15382535Ssangeeta 		 * if the packet dst is different from the ire_addr of
15392535Ssangeeta 		 * the ire for the nexthop.
15402535Ssangeeta 		 */
15412535Ssangeeta 		off_link = ((ipha->ipha_dst != ire->ire_addr) ||
15422535Ssangeeta 		    (ire->ire_gateway_addr != INADDR_ANY));
154310181SKen.Powell@Sun.COM 		if (!tsol_get_option_v4(mp, &label_type, &opt_ptr))
154410181SKen.Powell@Sun.COM 			return (NULL);
15451676Sjpk 	} else {
15461676Sjpk 		ASSERT(ire->ire_ipversion == IPV6_VERSION);
15471676Sjpk 		ip6h = (ip6_t *)mp->b_rptr;
15481676Sjpk 		psrc = &ip6h->ip6_src;
15491676Sjpk 		pdst = &ip6h->ip6_dst;
15501676Sjpk 		proto = ip6h->ip6_nxt;
15511676Sjpk 
15521676Sjpk 		if (proto != IPPROTO_TCP && proto != IPPROTO_UDP &&
15531676Sjpk 		    proto != IPPROTO_ICMPV6) {
15541676Sjpk 			uint8_t *nexthdrp;
15551676Sjpk 			uint16_t hdr_len;
15561676Sjpk 
15571676Sjpk 			if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len,
15581676Sjpk 			    &nexthdrp)) {
15591676Sjpk 				/* malformed packet; drop it */
15601676Sjpk 				return (NULL);
15611676Sjpk 			}
15621676Sjpk 			proto = *nexthdrp;
15631676Sjpk 		}
15641676Sjpk 
15651676Sjpk 		/* destination not directly reachable? */
15661676Sjpk 		off_link = !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6);
156710181SKen.Powell@Sun.COM 		if (!tsol_get_option_v6(mp, &label_type, &opt_ptr))
156810181SKen.Powell@Sun.COM 			return (NULL);
15691676Sjpk 	}
15701676Sjpk 
15718778SErik.Nordmark@Sun.COM 	if ((tsl = msg_getlabel(mp)) == NULL)
15721676Sjpk 		return (mp);
15731676Sjpk 
1574*10934Ssommerfeld@sun.com 	if (tsl->tsl_flags & TSLF_IMPLICIT_IN) {
1575*10934Ssommerfeld@sun.com 		DTRACE_PROBE3(tx__ip__log__drop__forward__unresolved__label,
1576*10934Ssommerfeld@sun.com 		    char *,
1577*10934Ssommerfeld@sun.com 		    "cannot forward packet mp(1) with unresolved "
1578*10934Ssommerfeld@sun.com 		    "security label sl(2)",
1579*10934Ssommerfeld@sun.com 		    mblk_t *, mp, ts_label_t *, tsl);
1580*10934Ssommerfeld@sun.com 
1581*10934Ssommerfeld@sun.com 		return (NULL);
1582*10934Ssommerfeld@sun.com 	}
1583*10934Ssommerfeld@sun.com 
1584*10934Ssommerfeld@sun.com 
15851676Sjpk 	ASSERT(psrc != NULL && pdst != NULL);
15861676Sjpk 	dst_rhtp = find_tpc(pdst, ire->ire_ipversion, B_FALSE);
15871676Sjpk 
15881676Sjpk 	if (dst_rhtp == NULL) {
15891676Sjpk 		/*
15901676Sjpk 		 * Without a template we do not know if forwarding
15911676Sjpk 		 * violates MAC
15921676Sjpk 		 */
15931676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__forward__nodst, char *,
15941676Sjpk 		    "mp(1) dropped, no template for destination ip4|6(2)",
15951676Sjpk 		    mblk_t *, mp, void *, pdst);
15961676Sjpk 		return (NULL);
15971676Sjpk 	}
15981676Sjpk 
15991676Sjpk 	/*
16001676Sjpk 	 * Gateway template must have existed for off-link destinations,
16011676Sjpk 	 * since tsol_ire_match_gwattr has ensured such condition.
16021676Sjpk 	 */
16032535Ssangeeta 	if (ire->ire_ipversion == IPV4_VERSION && off_link) {
16042535Ssangeeta 		/*
16052535Ssangeeta 		 * Surya note: first check if we can get the gw_rhtp from
16062535Ssangeeta 		 * the ire_gw_secattr->igsa_rhc; if this is null, then
16072535Ssangeeta 		 * do a lookup based on the ire_addr (address of gw)
16082535Ssangeeta 		 */
16092535Ssangeeta 		if (ire->ire_gw_secattr != NULL &&
16102535Ssangeeta 		    ire->ire_gw_secattr->igsa_rhc != NULL) {
16112535Ssangeeta 			attrp = ire->ire_gw_secattr;
16122535Ssangeeta 			gw_rhtp = attrp->igsa_rhc->rhc_tpc;
16132535Ssangeeta 		} else  {
16142535Ssangeeta 			/*
16152535Ssangeeta 			 * use the ire_addr if this is the IRE_CACHE of nexthop
16162535Ssangeeta 			 */
16172535Ssangeeta 			gw = (ire->ire_gateway_addr == NULL? &ire->ire_addr :
16182535Ssangeeta 			    &ire->ire_gateway_addr);
16192535Ssangeeta 			gw_rhtp = find_tpc(gw, ire->ire_ipversion, B_FALSE);
16202535Ssangeeta 			need_tpc_rele = B_TRUE;
16212535Ssangeeta 		}
16222535Ssangeeta 		if (gw_rhtp == NULL) {
16232535Ssangeeta 			DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
16242535Ssangeeta 			    "mp(1) dropped, no gateway in ire attributes(2)",
16252535Ssangeeta 			    mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
16262535Ssangeeta 			mp = NULL;
16272535Ssangeeta 			goto keep_label;
16282535Ssangeeta 		}
16292535Ssangeeta 	}
16302535Ssangeeta 	if (ire->ire_ipversion == IPV6_VERSION &&
16312535Ssangeeta 	    ((attrp = ire->ire_gw_secattr) == NULL || attrp->igsa_rhc == NULL ||
16321676Sjpk 	    (gw_rhtp = attrp->igsa_rhc->rhc_tpc) == NULL) && off_link) {
16331676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
16341676Sjpk 		    "mp(1) dropped, no gateway in ire attributes(2)",
16351676Sjpk 		    mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
16361676Sjpk 		mp = NULL;
16371676Sjpk 		goto keep_label;
16381676Sjpk 	}
16391676Sjpk 
16401676Sjpk 	/*
16411676Sjpk 	 * Check that the label for the packet is acceptable
16421676Sjpk 	 * by destination host; otherwise, drop it.
16431676Sjpk 	 */
16441676Sjpk 	switch (dst_rhtp->tpc_tp.host_type) {
16451676Sjpk 	case SUN_CIPSO:
16461676Sjpk 		if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
16471676Sjpk 		    (!_blinrange(&tsl->tsl_label,
16481676Sjpk 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
16491676Sjpk 		    !blinlset(&tsl->tsl_label,
16501676Sjpk 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
16511676Sjpk 			DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
16521676Sjpk 			    "labeled packet mp(1) dropped, label(2) fails "
16531676Sjpk 			    "destination(3) accredation check",
16541676Sjpk 			    mblk_t *, mp, ts_label_t *, tsl,
16551676Sjpk 			    tsol_tpc_t *, dst_rhtp);
16561676Sjpk 			mp = NULL;
16571676Sjpk 			goto keep_label;
16581676Sjpk 		}
16591676Sjpk 		break;
16601676Sjpk 
16611676Sjpk 
16621676Sjpk 	case UNLABELED:
16631676Sjpk 		if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
16641676Sjpk 		    !blequal(&dst_rhtp->tpc_tp.tp_def_label,
16651676Sjpk 		    &tsl->tsl_label)) {
16661676Sjpk 			DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
16671676Sjpk 			    "unlabeled packet mp(1) dropped, label(2) fails "
16681676Sjpk 			    "destination(3) accredation check",
16691676Sjpk 			    mblk_t *, mp, ts_label_t *, tsl,
16701676Sjpk 			    tsol_tpc_t *, dst_rhtp);
16711676Sjpk 			mp = NULL;
16721676Sjpk 			goto keep_label;
16731676Sjpk 		}
16741676Sjpk 		break;
16751676Sjpk 	}
16761676Sjpk 	if (label_type == OPT_CIPSO) {
16771676Sjpk 		/*
16781676Sjpk 		 * We keep the label on any of the following cases:
16791676Sjpk 		 *
16801676Sjpk 		 *   1. The destination is labeled (on/off-link).
16811676Sjpk 		 *   2. The unlabeled destination is off-link,
16821676Sjpk 		 *	and the next hop gateway is labeled.
16831676Sjpk 		 */
16841676Sjpk 		if (dst_rhtp->tpc_tp.host_type != UNLABELED ||
16851676Sjpk 		    (off_link &&
16861676Sjpk 		    gw_rhtp->tpc_tp.host_type != UNLABELED))
16871676Sjpk 			goto keep_label;
16881676Sjpk 
16891676Sjpk 		/*
16901676Sjpk 		 * Strip off the CIPSO option from the packet because: the
16911676Sjpk 		 * unlabeled destination host is directly reachable through
16921676Sjpk 		 * an interface (on-link); or, the unlabeled destination host
16931676Sjpk 		 * is not directly reachable (off-link), and the next hop
16941676Sjpk 		 * gateway is unlabeled.
16951676Sjpk 		 */
16961676Sjpk 		adjust = (af == AF_INET) ? tsol_remove_secopt(ipha, MBLKL(mp)) :
16971676Sjpk 		    tsol_remove_secopt_v6(ip6h, MBLKL(mp));
16981676Sjpk 
16991676Sjpk 		ASSERT(adjust <= 0);
17001676Sjpk 		if (adjust != 0) {
17011676Sjpk 
17021676Sjpk 			/* adjust is negative */
17031676Sjpk 			ASSERT((mp->b_wptr + adjust) >= mp->b_rptr);
17041676Sjpk 			mp->b_wptr += adjust;
17051676Sjpk 
17061676Sjpk 			if (af == AF_INET) {
17071676Sjpk 				ipha = (ipha_t *)mp->b_rptr;
17081676Sjpk 				iplen = ntohs(ipha->ipha_length) + adjust;
17091676Sjpk 				ipha->ipha_length = htons(iplen);
17101676Sjpk 				ipha->ipha_hdr_checksum = 0;
17111676Sjpk 				ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
17121676Sjpk 			}
17131676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__forward__adjust,
17141676Sjpk 			    char *,
17151676Sjpk 			    "mp(1) adjusted(2) for CIPSO option removal",
17161676Sjpk 			    mblk_t *, mp, int, adjust);
17171676Sjpk 		}
17181676Sjpk 		goto keep_label;
17191676Sjpk 	}
17201676Sjpk 
17211676Sjpk 	ASSERT(label_type == OPT_NONE);
17221676Sjpk 	ASSERT(dst_rhtp != NULL);
17231676Sjpk 
17241676Sjpk 	/*
17251676Sjpk 	 * We need to add CIPSO option if the destination or the next hop
17261676Sjpk 	 * gateway is labeled.  Otherwise, pass the packet as is.
17271676Sjpk 	 */
17281676Sjpk 	if (dst_rhtp->tpc_tp.host_type == UNLABELED &&
17291676Sjpk 	    (!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED))
17301676Sjpk 		goto keep_label;
17311676Sjpk 
17328778SErik.Nordmark@Sun.COM 
17339710SKen.Powell@Sun.COM 	credp = msg_getcred(mp, &pid);
17341676Sjpk 	if ((af == AF_INET &&
1735*10934Ssommerfeld@sun.com 	    tsol_check_label(credp, &mp, CONN_MAC_DEFAULT, ipst, pid) != 0) ||
17361676Sjpk 	    (af == AF_INET6 &&
1737*10934Ssommerfeld@sun.com 	    tsol_check_label_v6(credp, &mp, CONN_MAC_DEFAULT, ipst,
1738*10934Ssommerfeld@sun.com 	    pid) != 0)) {
17391676Sjpk 		mp = NULL;
17401676Sjpk 		goto keep_label;
17411676Sjpk 	}
17421676Sjpk 
17436596Skp158701 	if (af == AF_INET) {
17446596Skp158701 		ipha = (ipha_t *)mp->b_rptr;
17456596Skp158701 		ipha->ipha_hdr_checksum = 0;
17466596Skp158701 		ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
17471676Sjpk 	}
17481676Sjpk 
17491676Sjpk keep_label:
17501676Sjpk 	TPC_RELE(dst_rhtp);
17512535Ssangeeta 	if (need_tpc_rele && gw_rhtp != NULL)
17522535Ssangeeta 		TPC_RELE(gw_rhtp);
17531676Sjpk 	return (mp);
17541676Sjpk }
17551676Sjpk 
17561676Sjpk /*
17574564Swy83408  * Name:	tsol_pmtu_adjust()
17584564Swy83408  *
17594564Swy83408  * Returns the adjusted mtu after removing security option.
17604564Swy83408  * Removes/subtracts the option if the packet's cred indicates an unlabeled
17614564Swy83408  * sender or if pkt_diff indicates this system enlarged the packet.
17624564Swy83408  */
17634564Swy83408 uint32_t
17644564Swy83408 tsol_pmtu_adjust(mblk_t *mp, uint32_t mtu, int pkt_diff, int af)
17654564Swy83408 {
17664564Swy83408 	int		label_adj = 0;
17674564Swy83408 	uint32_t	min_mtu = IP_MIN_MTU;
17684564Swy83408 	tsol_tpc_t	*src_rhtp;
17694564Swy83408 	void		*src;
17704564Swy83408 
17714564Swy83408 	/*
17724564Swy83408 	 * Note: label_adj is non-positive, indicating the number of
17734564Swy83408 	 * bytes removed by removing the security option from the
17744564Swy83408 	 * header.
17754564Swy83408 	 */
17764564Swy83408 	if (af == AF_INET6) {
17774564Swy83408 		ip6_t	*ip6h;
17784564Swy83408 
17794564Swy83408 		min_mtu = IPV6_MIN_MTU;
17804564Swy83408 		ip6h = (ip6_t *)mp->b_rptr;
17814564Swy83408 		src = &ip6h->ip6_src;
17824564Swy83408 		if ((src_rhtp = find_tpc(src, IPV6_VERSION, B_FALSE)) == NULL)
17834564Swy83408 			return (mtu);
17844564Swy83408 		if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) {
17854564Swy83408 			label_adj = tsol_remove_secopt_v6(
17864564Swy83408 			    (ip6_t *)mp->b_rptr, MBLKL(mp));
17874564Swy83408 		}
17884564Swy83408 	} else {
17894564Swy83408 		ipha_t    *ipha;
17904564Swy83408 
17914564Swy83408 		ASSERT(af == AF_INET);
17924564Swy83408 		ipha = (ipha_t *)mp->b_rptr;
17934564Swy83408 		src = &ipha->ipha_src;
17944564Swy83408 		if ((src_rhtp = find_tpc(src, IPV4_VERSION, B_FALSE)) == NULL)
17954564Swy83408 			return (mtu);
17964564Swy83408 		if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED)
17974564Swy83408 			label_adj = tsol_remove_secopt(
17984564Swy83408 			    (ipha_t *)mp->b_rptr, MBLKL(mp));
17994564Swy83408 	}
18004564Swy83408 	/*
18014564Swy83408 	 * Make pkt_diff non-negative and the larger of the bytes
18024564Swy83408 	 * previously added (if any) or just removed, since label
18034564Swy83408 	 * addition + subtraction may not be completely idempotent.
18044564Swy83408 	 */
18054564Swy83408 	if (pkt_diff < -label_adj)
18064564Swy83408 		pkt_diff = -label_adj;
18074564Swy83408 	if (pkt_diff > 0 && pkt_diff < mtu)
18084564Swy83408 		mtu -= pkt_diff;
18094564Swy83408 
18104564Swy83408 	TPC_RELE(src_rhtp);
18114564Swy83408 	return (MAX(mtu, min_mtu));
18124564Swy83408 }
18134564Swy83408 
18144564Swy83408 /*
18151676Sjpk  * Name:	tsol_rtsa_init()
18161676Sjpk  *
18171676Sjpk  * Normal:	Sanity checks on the route security attributes provided by
18181676Sjpk  *		user.  Convert it into a route security parameter list to
18191676Sjpk  *		be returned to caller.
18201676Sjpk  *
18211676Sjpk  * Output:	EINVAL if bad security attributes in the routing message
18221676Sjpk  *		ENOMEM if unable to allocate data structures
18231676Sjpk  *		0 otherwise.
18241676Sjpk  *
18251676Sjpk  * Note:	On input, cp must point to the end of any addresses in
18261676Sjpk  *		the rt_msghdr_t structure.
18271676Sjpk  */
18281676Sjpk int
18291676Sjpk tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp)
18301676Sjpk {
18311676Sjpk 	uint_t	sacnt;
18321676Sjpk 	int	err;
18331676Sjpk 	caddr_t	lim;
18341676Sjpk 	tsol_rtsecattr_t *tp;
18351676Sjpk 
18361676Sjpk 	ASSERT((cp >= (caddr_t)&rtm[1]) && sp != NULL);
18371676Sjpk 
18381676Sjpk 	/*
18391676Sjpk 	 * In theory, we could accept as many security attributes configured
18401676Sjpk 	 * per route destination.  However, the current design is limited
18411676Sjpk 	 * such that at most only one set security attributes is allowed to
18421676Sjpk 	 * be associated with a prefix IRE.  We therefore assert for now.
18431676Sjpk 	 */
18441676Sjpk 	/* LINTED */
18451676Sjpk 	ASSERT(TSOL_RTSA_REQUEST_MAX == 1);
18461676Sjpk 
18471676Sjpk 	sp->rtsa_cnt = 0;
18481676Sjpk 	lim = (caddr_t)rtm + rtm->rtm_msglen;
18491676Sjpk 	ASSERT(cp <= lim);
18501676Sjpk 
18511676Sjpk 	if ((lim - cp) < sizeof (rtm_ext_t) ||
18521676Sjpk 	    ((rtm_ext_t *)cp)->rtmex_type != RTMEX_GATEWAY_SECATTR)
18531676Sjpk 		return (0);
18541676Sjpk 
18551676Sjpk 	if (((rtm_ext_t *)cp)->rtmex_len < sizeof (tsol_rtsecattr_t))
18561676Sjpk 		return (EINVAL);
18571676Sjpk 
18581676Sjpk 	cp += sizeof (rtm_ext_t);
18591676Sjpk 
18601676Sjpk 	if ((lim - cp) < sizeof (*tp) ||
18611676Sjpk 	    (tp = (tsol_rtsecattr_t *)cp, (sacnt = tp->rtsa_cnt) == 0) ||
18621676Sjpk 	    (lim - cp) < TSOL_RTSECATTR_SIZE(sacnt))
18631676Sjpk 		return (EINVAL);
18641676Sjpk 
18651676Sjpk 	/*
18661676Sjpk 	 * Trying to add route security attributes when system
18671676Sjpk 	 * labeling service is not available, or when user supllies
18681676Sjpk 	 * more than the maximum number of security attributes
18691676Sjpk 	 * allowed per request.
18701676Sjpk 	 */
18711676Sjpk 	if ((sacnt > 0 && !is_system_labeled()) ||
18721676Sjpk 	    sacnt > TSOL_RTSA_REQUEST_MAX)
18731676Sjpk 		return (EINVAL);
18741676Sjpk 
18751676Sjpk 	/* Ensure valid credentials */
18761676Sjpk 	if ((err = rtsa_validate(&((tsol_rtsecattr_t *)cp)->
18771676Sjpk 	    rtsa_attr[0])) != 0) {
18781676Sjpk 		cp += sizeof (*sp);
18791676Sjpk 		return (err);
18801676Sjpk 	}
18811676Sjpk 
18821676Sjpk 	bcopy(cp, sp, sizeof (*sp));
18831676Sjpk 	cp += sizeof (*sp);
18841676Sjpk 	return (0);
18851676Sjpk }
18861676Sjpk 
18871676Sjpk int
18881676Sjpk tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc,
18891676Sjpk     tsol_gcgrp_t *gcgrp)
18901676Sjpk {
18911676Sjpk 	tsol_ire_gw_secattr_t *attrp;
18921676Sjpk 	boolean_t exists = B_FALSE;
18931676Sjpk 	in_addr_t ga_addr4;
18941676Sjpk 	void *paddr = NULL;
18951676Sjpk 
18961676Sjpk 	ASSERT(ire != NULL);
18971676Sjpk 
18981676Sjpk 	/*
18991676Sjpk 	 * The only time that attrp can be NULL is when this routine is
19001676Sjpk 	 * called for the first time during the creation/initialization
19011676Sjpk 	 * of the corresponding IRE.  It will only get cleared when the
19021676Sjpk 	 * IRE is deleted.
19031676Sjpk 	 */
19041676Sjpk 	if ((attrp = ire->ire_gw_secattr) == NULL) {
19051676Sjpk 		attrp = ire_gw_secattr_alloc(KM_NOSLEEP);
19061676Sjpk 		if (attrp == NULL)
19071676Sjpk 			return (ENOMEM);
19081676Sjpk 		ire->ire_gw_secattr = attrp;
19091676Sjpk 	} else {
19101676Sjpk 		exists = B_TRUE;
19111676Sjpk 		mutex_enter(&attrp->igsa_lock);
19121676Sjpk 
19131676Sjpk 		if (attrp->igsa_rhc != NULL) {
19141676Sjpk 			TNRHC_RELE(attrp->igsa_rhc);
19151676Sjpk 			attrp->igsa_rhc = NULL;
19161676Sjpk 		}
19171676Sjpk 
19181676Sjpk 		if (attrp->igsa_gc != NULL)
19191676Sjpk 			GC_REFRELE(attrp->igsa_gc);
19201676Sjpk 		if (attrp->igsa_gcgrp != NULL)
19211676Sjpk 			GCGRP_REFRELE(attrp->igsa_gcgrp);
19221676Sjpk 	}
19231676Sjpk 	ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock));
19241676Sjpk 
19251676Sjpk 	/*
19261676Sjpk 	 * References already held by caller and we keep them;
19271676Sjpk 	 * note that both gc and gcgrp may be set to NULL to
19281676Sjpk 	 * clear out igsa_gc and igsa_gcgrp, respectively.
19291676Sjpk 	 */
19301676Sjpk 	attrp->igsa_gc = gc;
19311676Sjpk 	attrp->igsa_gcgrp = gcgrp;
19321676Sjpk 
19331676Sjpk 	if (gcgrp == NULL && gc != NULL) {
19341676Sjpk 		gcgrp = gc->gc_grp;
19351676Sjpk 		ASSERT(gcgrp != NULL);
19361676Sjpk 	}
19371676Sjpk 
19381676Sjpk 	/*
19391676Sjpk 	 * Intialize the template for gateway; we use the gateway's
19401676Sjpk 	 * address found in either the passed in gateway credential
19411676Sjpk 	 * or group pointer, or the ire_gateway_addr{_v6} field.
19421676Sjpk 	 */
19431676Sjpk 	if (gcgrp != NULL) {
19441676Sjpk 		tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
19451676Sjpk 
19461676Sjpk 		/*
19471676Sjpk 		 * Caller is holding a reference, and that we don't
19481676Sjpk 		 * need to hold any lock to access the address.
19491676Sjpk 		 */
19501676Sjpk 		if (ipversion == IPV4_VERSION) {
19511676Sjpk 			ASSERT(ga->ga_af == AF_INET);
19521676Sjpk 			IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
19531676Sjpk 			paddr = &ga_addr4;
19541676Sjpk 		} else {
19551676Sjpk 			ASSERT(ga->ga_af == AF_INET6);
19561676Sjpk 			paddr = &ga->ga_addr;
19571676Sjpk 		}
19581676Sjpk 	} else if (ipversion == IPV6_VERSION &&
19591676Sjpk 	    !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) {
19601676Sjpk 		paddr = &ire->ire_gateway_addr_v6;
19611676Sjpk 	} else if (ipversion == IPV4_VERSION &&
19621676Sjpk 	    ire->ire_gateway_addr != INADDR_ANY) {
19631676Sjpk 		paddr = &ire->ire_gateway_addr;
19641676Sjpk 	}
19651676Sjpk 
19661676Sjpk 	/*
19671676Sjpk 	 * Lookup the gateway template; note that we could get an internal
19681676Sjpk 	 * template here, which we cache anyway.  During IRE matching, we'll
19691676Sjpk 	 * try to update this gateway template cache and hopefully get a
19701676Sjpk 	 * real one.
19711676Sjpk 	 */
19721676Sjpk 	if (paddr != NULL) {
19733292Skp158701 		attrp->igsa_rhc = find_rhc(paddr, ipversion, B_FALSE);
19741676Sjpk 	}
19751676Sjpk 
19761676Sjpk 	if (exists)
19771676Sjpk 		mutex_exit(&attrp->igsa_lock);
19781676Sjpk 
19791676Sjpk 	return (0);
19801676Sjpk }
19811676Sjpk 
19821676Sjpk /*
19831676Sjpk  * This function figures the type of MLP that we'll be using based on the
19841676Sjpk  * address that the user is binding and the zone.  If the address is
19851676Sjpk  * unspecified, then we're looking at both private and shared.  If it's one
19861676Sjpk  * of the zone's private addresses, then it's private only.  If it's one
198710493SJarrett.Lu@Sun.COM  * of the global addresses, then it's shared only. Multicast addresses are
198810493SJarrett.Lu@Sun.COM  * treated same as unspecified address.
19891676Sjpk  *
19901676Sjpk  * If we can't figure out what it is, then return mlptSingle.  That's actually
19911676Sjpk  * an error case.
19923448Sdh155122  *
19933448Sdh155122  * The callers are assume to pass in zone->zone_id and not the zoneid that
19943448Sdh155122  * is stored in a conn_t (since the latter will be GLOBAL_ZONEID in an
19953448Sdh155122  * exclusive stack zone).
19961676Sjpk  */
19971676Sjpk mlp_type_t
19983448Sdh155122 tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr,
19993448Sdh155122     ip_stack_t *ipst)
20001676Sjpk {
20011676Sjpk 	in_addr_t in4;
20021676Sjpk 	ire_t *ire;
20031676Sjpk 	ipif_t *ipif;
20041676Sjpk 	zoneid_t addrzone;
20053448Sdh155122 	zoneid_t ip_zoneid;
20061676Sjpk 
20071676Sjpk 	ASSERT(addr != NULL);
20081676Sjpk 
20093448Sdh155122 	/*
20103448Sdh155122 	 * For exclusive stacks we set the zoneid to zero
20113448Sdh155122 	 * to operate as if in the global zone for IRE and conn_t comparisons.
20123448Sdh155122 	 */
20133448Sdh155122 	if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
20143448Sdh155122 		ip_zoneid = GLOBAL_ZONEID;
20153448Sdh155122 	else
20163448Sdh155122 		ip_zoneid = zoneid;
20173448Sdh155122 
20181676Sjpk 	if (version == IPV6_VERSION &&
20191676Sjpk 	    IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) {
20201676Sjpk 		IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4);
20211676Sjpk 		addr = &in4;
20221676Sjpk 		version = IPV4_VERSION;
20231676Sjpk 	}
20241676Sjpk 
20251676Sjpk 	if (version == IPV4_VERSION) {
20261676Sjpk 		in4 = *(const in_addr_t *)addr;
202710493SJarrett.Lu@Sun.COM 		if ((in4 == INADDR_ANY) || CLASSD(in4)) {
20281676Sjpk 			return (mlptBoth);
20293448Sdh155122 		}
20303448Sdh155122 		ire = ire_cache_lookup(in4, ip_zoneid, NULL, ipst);
20311676Sjpk 	} else {
203210493SJarrett.Lu@Sun.COM 		if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr) ||
203310493SJarrett.Lu@Sun.COM 		    IN6_IS_ADDR_MULTICAST((const in6_addr_t *)addr)) {
20341676Sjpk 			return (mlptBoth);
20353448Sdh155122 		}
20363448Sdh155122 		ire = ire_cache_lookup_v6(addr, ip_zoneid, NULL, ipst);
20371676Sjpk 	}
20381676Sjpk 	/*
20391676Sjpk 	 * If we can't find the IRE, then we have to behave exactly like
20401676Sjpk 	 * ip_bind_laddr{,_v6}.  That means looking up the IPIF so that users
20411676Sjpk 	 * can bind to addresses on "down" interfaces.
20421676Sjpk 	 *
20431676Sjpk 	 * If we can't find that either, then the bind is going to fail, so
20441676Sjpk 	 * just give up.  Note that there's a miniscule chance that the address
20451676Sjpk 	 * is in transition, but we don't bother handling that.
20461676Sjpk 	 */
20471676Sjpk 	if (ire == NULL) {
20481676Sjpk 		if (version == IPV4_VERSION)
20491676Sjpk 			ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL,
20503448Sdh155122 			    ip_zoneid, NULL, NULL, NULL, NULL, ipst);
20511676Sjpk 		else
20521676Sjpk 			ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr,
20533448Sdh155122 			    NULL, ip_zoneid, NULL, NULL, NULL, NULL, ipst);
20543448Sdh155122 		if (ipif == NULL) {
20551676Sjpk 			return (mlptSingle);
20563448Sdh155122 		}
20571676Sjpk 		addrzone = ipif->ipif_zoneid;
20581676Sjpk 		ipif_refrele(ipif);
20591676Sjpk 	} else {
20601676Sjpk 		addrzone = ire->ire_zoneid;
20611676Sjpk 		ire_refrele(ire);
20621676Sjpk 	}
20631676Sjpk 	return (addrzone == ALL_ZONES ? mlptShared : mlptPrivate);
20641676Sjpk }
20651676Sjpk 
20661676Sjpk /*
20671676Sjpk  * Since we are configuring local interfaces, and we know trusted
20681676Sjpk  * extension CDE requires local interfaces to be cipso host type in
20691676Sjpk  * order to function correctly, we'll associate a cipso template
20701676Sjpk  * to each local interface and let the interface come up.  Configuring
20711676Sjpk  * a local interface to be "unlabeled" host type is a configuration error.
20721676Sjpk  * We'll override that error and make the interface host type to be cipso
20731676Sjpk  * here.
20741676Sjpk  *
20751676Sjpk  * The code is optimized for the usual "success" case and unwinds things on
20761676Sjpk  * error.  We don't want to go to the trouble and expense of formatting the
20771676Sjpk  * interface name for the usual case where everything is configured correctly.
20781676Sjpk  */
20791676Sjpk boolean_t
20801676Sjpk tsol_check_interface_address(const ipif_t *ipif)
20811676Sjpk {
20821676Sjpk 	tsol_tpc_t *tp;
20831676Sjpk 	char addrbuf[INET6_ADDRSTRLEN];
20841676Sjpk 	int af;
20851676Sjpk 	const void *addr;
20861676Sjpk 	zone_t *zone;
20871676Sjpk 	ts_label_t *plabel;
20881676Sjpk 	const bslabel_t *label;
20891676Sjpk 	char ifbuf[LIFNAMSIZ + 10];
20901676Sjpk 	const char *ifname;
20911676Sjpk 	boolean_t retval;
20921676Sjpk 	tsol_rhent_t rhent;
20933448Sdh155122 	netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack;
20941676Sjpk 
20951676Sjpk 	if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) {
20961676Sjpk 		af = AF_INET;
20971676Sjpk 		addr = &V4_PART_OF_V6(ipif->ipif_v6lcl_addr);
20981676Sjpk 	} else {
20991676Sjpk 		af = AF_INET6;
21001676Sjpk 		addr = &ipif->ipif_v6lcl_addr;
21011676Sjpk 	}
21021676Sjpk 
21031676Sjpk 	tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE);
21043448Sdh155122 
21053448Sdh155122 	/* assumes that ALL_ZONES implies that there is no exclusive stack */
21063448Sdh155122 	if (ipif->ipif_zoneid == ALL_ZONES) {
21073448Sdh155122 		zone = NULL;
21083448Sdh155122 	} else if (ns->netstack_stackid == GLOBAL_NETSTACKID) {
21093448Sdh155122 		/* Shared stack case */
21103448Sdh155122 		zone = zone_find_by_id(ipif->ipif_zoneid);
21113448Sdh155122 	} else {
21123448Sdh155122 		/* Exclusive stack case */
21133448Sdh155122 		zone = zone_find_by_id(crgetzoneid(ipif->ipif_ill->ill_credp));
21143448Sdh155122 	}
21151676Sjpk 	if (zone != NULL) {
21161676Sjpk 		plabel = zone->zone_slabel;
21171676Sjpk 		ASSERT(plabel != NULL);
21181676Sjpk 		label = label2bslabel(plabel);
21191676Sjpk 	}
21201676Sjpk 
21211676Sjpk 	/*
21221676Sjpk 	 * If it's CIPSO and an all-zones address, then we're done.
21231676Sjpk 	 * If it's a CIPSO zone specific address, the zone's label
21241676Sjpk 	 * must be in the range or set specified in the template.
21251676Sjpk 	 * When the remote host entry is missing or the template
21261676Sjpk 	 * type is incorrect for this interface, we create a
21271676Sjpk 	 * CIPSO host entry in kernel and allow the interface to be
21281676Sjpk 	 * brought up as CIPSO type.
21291676Sjpk 	 */
21301676Sjpk 	if (tp != NULL && (
21311676Sjpk 	    /* The all-zones case */
21321676Sjpk 	    (tp->tpc_tp.host_type == SUN_CIPSO &&
21331676Sjpk 	    tp->tpc_tp.tp_doi == default_doi &&
21341676Sjpk 	    ipif->ipif_zoneid == ALL_ZONES) ||
21351676Sjpk 	    /* The local-zone case */
21361676Sjpk 	    (zone != NULL && plabel->tsl_doi == tp->tpc_tp.tp_doi &&
21371676Sjpk 	    ((tp->tpc_tp.host_type == SUN_CIPSO &&
21381676Sjpk 	    (_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) ||
21391676Sjpk 	    blinlset(label, tp->tpc_tp.tp_sl_set_cipso))))))) {
21401676Sjpk 		if (zone != NULL)
21411676Sjpk 			zone_rele(zone);
21421676Sjpk 		TPC_RELE(tp);
21431676Sjpk 		return (B_TRUE);
21441676Sjpk 	}
21451676Sjpk 
21461676Sjpk 	ifname = ipif->ipif_ill->ill_name;
21471676Sjpk 	if (ipif->ipif_id != 0) {
21481676Sjpk 		(void) snprintf(ifbuf, sizeof (ifbuf), "%s:%u", ifname,
21491676Sjpk 		    ipif->ipif_id);
21501676Sjpk 		ifname = ifbuf;
21511676Sjpk 	}
21521676Sjpk 	(void) inet_ntop(af, addr, addrbuf, sizeof (addrbuf));
21531676Sjpk 
21541676Sjpk 	if (tp == NULL) {
21551676Sjpk 		cmn_err(CE_NOTE, "template entry for %s missing. Default to "
21561676Sjpk 		    "CIPSO type for %s", ifname, addrbuf);
21571676Sjpk 		retval = B_TRUE;
21581676Sjpk 	} else if (tp->tpc_tp.host_type == UNLABELED) {
21591676Sjpk 		cmn_err(CE_NOTE, "template type for %s incorrectly configured. "
21601676Sjpk 		    "Change to CIPSO type for %s", ifname, addrbuf);
21611676Sjpk 		retval = B_TRUE;
21621676Sjpk 	} else if (ipif->ipif_zoneid == ALL_ZONES) {
21631676Sjpk 		if (tp->tpc_tp.host_type != SUN_CIPSO) {
21641676Sjpk 			cmn_err(CE_NOTE, "%s failed: %s isn't set to CIPSO for "
21651676Sjpk 			    "all-zones. Converted to CIPSO.", ifname, addrbuf);
21661676Sjpk 			retval = B_TRUE;
21671676Sjpk 		} else {
21681676Sjpk 			cmn_err(CE_NOTE, "%s failed: %s has wrong DOI %d "
21691676Sjpk 			    "instead of %d", ifname, addrbuf,
21701676Sjpk 			    tp->tpc_tp.tp_doi, default_doi);
21711676Sjpk 			retval = B_FALSE;
21721676Sjpk 		}
21731676Sjpk 	} else if (zone == NULL) {
21741676Sjpk 		cmn_err(CE_NOTE, "%s failed: zoneid %d unknown",
21751676Sjpk 		    ifname, ipif->ipif_zoneid);
21761676Sjpk 		retval = B_FALSE;
21771676Sjpk 	} else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
21781676Sjpk 		cmn_err(CE_NOTE, "%s failed: zone %s has DOI %d but %s has "
21791676Sjpk 		    "DOI %d", ifname, zone->zone_name, plabel->tsl_doi,
21801676Sjpk 		    addrbuf, tp->tpc_tp.tp_doi);
21811676Sjpk 		retval = B_FALSE;
21821676Sjpk 	} else {
21831676Sjpk 		cmn_err(CE_NOTE, "%s failed: zone %s label incompatible with "
21841676Sjpk 		    "%s", ifname, zone->zone_name, addrbuf);
21851676Sjpk 		tsol_print_label(label, "zone label");
21861676Sjpk 		retval = B_FALSE;
21871676Sjpk 	}
21881676Sjpk 
21891676Sjpk 	if (zone != NULL)
21901676Sjpk 		zone_rele(zone);
21911676Sjpk 	if (tp != NULL)
21921676Sjpk 		TPC_RELE(tp);
21931676Sjpk 	if (retval) {
21941676Sjpk 		/*
21951676Sjpk 		 * we've corrected a config error and let the interface
21961676Sjpk 		 * come up as cipso. Need to insert an rhent.
21971676Sjpk 		 */
21981676Sjpk 		if ((rhent.rh_address.ta_family = af) == AF_INET) {
21991676Sjpk 			rhent.rh_prefix = 32;
22001676Sjpk 			rhent.rh_address.ta_addr_v4 = *(struct in_addr *)addr;
22011676Sjpk 		} else {
22021676Sjpk 			rhent.rh_prefix = 128;
22031676Sjpk 			rhent.rh_address.ta_addr_v6 = *(in6_addr_t *)addr;
22041676Sjpk 		}
22051676Sjpk 		(void) strcpy(rhent.rh_template, "cipso");
22061676Sjpk 		if (tnrh_load(&rhent) != 0) {
22071676Sjpk 			cmn_err(CE_NOTE, "%s failed: Cannot insert CIPSO "
22081676Sjpk 			    "template for local addr %s", ifname, addrbuf);
22091676Sjpk 			retval = B_FALSE;
22101676Sjpk 		}
22111676Sjpk 	}
22121676Sjpk 	return (retval);
22131676Sjpk }
2214