xref: /onnv-gate/usr/src/uts/common/inet/ip/tnet.c (revision 1676:37f4a3e2bd99)
1*1676Sjpk /*
2*1676Sjpk  * CDDL HEADER START
3*1676Sjpk  *
4*1676Sjpk  * The contents of this file are subject to the terms of the
5*1676Sjpk  * Common Development and Distribution License (the "License").
6*1676Sjpk  * You may not use this file except in compliance with the License.
7*1676Sjpk  *
8*1676Sjpk  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*1676Sjpk  * or http://www.opensolaris.org/os/licensing.
10*1676Sjpk  * See the License for the specific language governing permissions
11*1676Sjpk  * and limitations under the License.
12*1676Sjpk  *
13*1676Sjpk  * When distributing Covered Code, include this CDDL HEADER in each
14*1676Sjpk  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*1676Sjpk  * If applicable, add the following below this CDDL HEADER, with the
16*1676Sjpk  * fields enclosed by brackets "[]" replaced with your own identifying
17*1676Sjpk  * information: Portions Copyright [yyyy] [name of copyright owner]
18*1676Sjpk  *
19*1676Sjpk  * CDDL HEADER END
20*1676Sjpk  */
21*1676Sjpk /*
22*1676Sjpk  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23*1676Sjpk  * Use is subject to license terms.
24*1676Sjpk  */
25*1676Sjpk 
26*1676Sjpk #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*1676Sjpk 
28*1676Sjpk #include <sys/types.h>
29*1676Sjpk #include <sys/stream.h>
30*1676Sjpk #include <sys/strsubr.h>
31*1676Sjpk #include <sys/stropts.h>
32*1676Sjpk #include <sys/sunddi.h>
33*1676Sjpk #include <sys/cred.h>
34*1676Sjpk #include <sys/debug.h>
35*1676Sjpk #include <sys/kmem.h>
36*1676Sjpk #include <sys/errno.h>
37*1676Sjpk #include <sys/disp.h>
38*1676Sjpk #include <netinet/in.h>
39*1676Sjpk #include <netinet/in_systm.h>
40*1676Sjpk #include <netinet/ip.h>
41*1676Sjpk #include <netinet/ip_icmp.h>
42*1676Sjpk #include <netinet/tcp.h>
43*1676Sjpk #include <inet/common.h>
44*1676Sjpk #include <inet/ipclassifier.h>
45*1676Sjpk #include <inet/ip.h>
46*1676Sjpk #include <inet/mib2.h>
47*1676Sjpk #include <inet/nd.h>
48*1676Sjpk #include <inet/tcp.h>
49*1676Sjpk #include <inet/ip_rts.h>
50*1676Sjpk #include <inet/ip_ire.h>
51*1676Sjpk #include <inet/ip_if.h>
52*1676Sjpk #include <sys/modhash.h>
53*1676Sjpk 
54*1676Sjpk #include <sys/tsol/label.h>
55*1676Sjpk #include <sys/tsol/label_macro.h>
56*1676Sjpk #include <sys/tsol/tnet.h>
57*1676Sjpk #include <sys/tsol/tndb.h>
58*1676Sjpk #include <sys/strsun.h>
59*1676Sjpk 
60*1676Sjpk /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */
61*1676Sjpk int tsol_strict_error;
62*1676Sjpk 
63*1676Sjpk /*
64*1676Sjpk  * Some notes on the Trusted Solaris IRE gateway security attributes:
65*1676Sjpk  *
66*1676Sjpk  * When running in Trusted mode, the routing subsystem determines whether or
67*1676Sjpk  * not a packet can be delivered to an off-link host (not directly reachable
68*1676Sjpk  * through an interface) based on the accreditation checks of the packet's
69*1676Sjpk  * security attributes against those associated with the next-hop gateway.
70*1676Sjpk  *
71*1676Sjpk  * The next-hop gateway's security attributes can be derived from two sources
72*1676Sjpk  * (in order of preference): route-related and the host database.  A Trusted
73*1676Sjpk  * system must be configured with at least the host database containing an
74*1676Sjpk  * entry for the next-hop gateway, or otherwise no accreditation checks can
75*1676Sjpk  * be performed, which may result in the inability to send packets to any
76*1676Sjpk  * off-link destination host.
77*1676Sjpk  *
78*1676Sjpk  * The major differences between the two sources are the number and type of
79*1676Sjpk  * security attributes used for accreditation checks.  A host database entry
80*1676Sjpk  * can contain at most one set of security attributes, specific only to the
81*1676Sjpk  * next-hop gateway.  On contrast, route-related security attributes are made
82*1676Sjpk  * up of a collection of security attributes for the distant networks, and
83*1676Sjpk  * are grouped together per next-hop gateway used to reach those networks.
84*1676Sjpk  * This is the preferred method, and the routing subsystem will fallback to
85*1676Sjpk  * the host database entry only if there are no route-related attributes
86*1676Sjpk  * associated with the next-hop gateway.
87*1676Sjpk  *
88*1676Sjpk  * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/
89*1676Sjpk  * INTERFACE type) are initialized to contain a placeholder to store this
90*1676Sjpk  * information.  The ire_gw_secattr structure gets allocated, initialized
91*1676Sjpk  * and associated with the IRE during the time of the IRE creation.  The
92*1676Sjpk  * initialization process also includes resolving the host database entry
93*1676Sjpk  * of the next-hop gateway for fallback purposes.  It does not include any
94*1676Sjpk  * route-related attribute setup, as that process comes separately as part
95*1676Sjpk  * of the route requests (add/change) made to the routing subsystem.
96*1676Sjpk  *
97*1676Sjpk  * The underlying logic which involves associating IREs with the gateway
98*1676Sjpk  * security attributes are represented by the following data structures:
99*1676Sjpk  *
100*1676Sjpk  * tsol_gcdb_t, or "gcdb"
101*1676Sjpk  *
102*1676Sjpk  *	- This is a system-wide collection of records containing the
103*1676Sjpk  *	  currently used route-related security attributes, which are fed
104*1676Sjpk  *	  through the routing socket interface, e.g. "route add/change".
105*1676Sjpk  *
106*1676Sjpk  * tsol_gc_t, or "gc"
107*1676Sjpk  *
108*1676Sjpk  *	- This is the gateway credential structure, and it provides for the
109*1676Sjpk  *	  only mechanism to access the contents of gcdb.  More than one gc
110*1676Sjpk  *	  entries may refer to the same gcdb record.  gc's in the system are
111*1676Sjpk  *	  grouped according to the next-hop gateway address.
112*1676Sjpk  *
113*1676Sjpk  * tsol_gcgrp_t, or "gcgrp"
114*1676Sjpk  *
115*1676Sjpk  *	- Group of gateway credentials, and is unique per next-hop gateway
116*1676Sjpk  *	  address.  When the group is not empty, i.e. when gcgrp_count is
117*1676Sjpk  *	  greater than zero, it contains one or more gc's, each pointing to
118*1676Sjpk  *	  a gcdb record which indicates the gateway security attributes
119*1676Sjpk  *	  associated with the next-hop gateway.
120*1676Sjpk  *
121*1676Sjpk  * The fields of the tsol_ire_gw_secattr_t used from within the IRE are:
122*1676Sjpk  *
123*1676Sjpk  * igsa_lock
124*1676Sjpk  *
125*1676Sjpk  *	- Lock that protects all fields within tsol_ire_gw_secattr_t.
126*1676Sjpk  *
127*1676Sjpk  * igsa_rhc
128*1676Sjpk  *
129*1676Sjpk  *	- Remote host cache database entry of next-hop gateway.  This is
130*1676Sjpk  *	  used in the case when there are no route-related attributes
131*1676Sjpk  *	  configured for the IRE.
132*1676Sjpk  *
133*1676Sjpk  * igsa_gc
134*1676Sjpk  *
135*1676Sjpk  *	- A set of route-related attributes that only get set for prefix
136*1676Sjpk  *	  IREs.  If this is non-NULL, the prefix IRE has been associated
137*1676Sjpk  *	  with a set of gateway security attributes by way of route add/
138*1676Sjpk  *	  change functionality.  This field stays NULL for IRE_CACHEs.
139*1676Sjpk  *
140*1676Sjpk  * igsa_gcgrp
141*1676Sjpk  *
142*1676Sjpk  *	- Group of gc's which only gets set for IRE_CACHEs.  Each of the gc
143*1676Sjpk  *	  points to a gcdb record that contains the security attributes
144*1676Sjpk  *	  used to perform the credential checks of the packet which uses
145*1676Sjpk  *	  the IRE.  If the group is not empty, the list of gc's can be
146*1676Sjpk  *	  traversed starting at gcgrp_head.  This field stays NULL for
147*1676Sjpk  *	  prefix IREs.
148*1676Sjpk  */
149*1676Sjpk 
150*1676Sjpk static kmem_cache_t *ire_gw_secattr_cache;
151*1676Sjpk 
152*1676Sjpk #define	GCDB_HASH_SIZE	101
153*1676Sjpk #define	GCGRP_HASH_SIZE	101
154*1676Sjpk 
155*1676Sjpk #define	GCDB_REFRELE(p) {		\
156*1676Sjpk 	mutex_enter(&gcdb_lock);	\
157*1676Sjpk 	ASSERT((p)->gcdb_refcnt > 0);	\
158*1676Sjpk 	if (--((p)->gcdb_refcnt) == 0)	\
159*1676Sjpk 		gcdb_inactive(p);	\
160*1676Sjpk 	ASSERT(MUTEX_HELD(&gcdb_lock));	\
161*1676Sjpk 	mutex_exit(&gcdb_lock);		\
162*1676Sjpk }
163*1676Sjpk 
164*1676Sjpk static int gcdb_hash_size = GCDB_HASH_SIZE;
165*1676Sjpk static int gcgrp_hash_size = GCGRP_HASH_SIZE;
166*1676Sjpk static mod_hash_t *gcdb_hash;
167*1676Sjpk static mod_hash_t *gcgrp4_hash;
168*1676Sjpk static mod_hash_t *gcgrp6_hash;
169*1676Sjpk 
170*1676Sjpk static kmutex_t gcdb_lock;
171*1676Sjpk kmutex_t gcgrp_lock;
172*1676Sjpk 
173*1676Sjpk static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t);
174*1676Sjpk static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t);
175*1676Sjpk static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t);
176*1676Sjpk static void gcdb_inactive(tsol_gcdb_t *);
177*1676Sjpk 
178*1676Sjpk static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t);
179*1676Sjpk static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t);
180*1676Sjpk 
181*1676Sjpk static int ire_gw_secattr_constructor(void *, void *, int);
182*1676Sjpk static void ire_gw_secattr_destructor(void *, void *);
183*1676Sjpk 
184*1676Sjpk void
185*1676Sjpk tnet_init(void)
186*1676Sjpk {
187*1676Sjpk 	ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache",
188*1676Sjpk 	    sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor,
189*1676Sjpk 	    ire_gw_secattr_destructor, NULL, NULL, NULL, 0);
190*1676Sjpk 
191*1676Sjpk 	gcdb_hash = mod_hash_create_extended("gcdb_hash",
192*1676Sjpk 	    gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
193*1676Sjpk 	    gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP);
194*1676Sjpk 
195*1676Sjpk 	gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash",
196*1676Sjpk 	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
197*1676Sjpk 	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
198*1676Sjpk 
199*1676Sjpk 	gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash",
200*1676Sjpk 	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
201*1676Sjpk 	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
202*1676Sjpk 
203*1676Sjpk 	mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL);
204*1676Sjpk 	mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL);
205*1676Sjpk }
206*1676Sjpk 
207*1676Sjpk void
208*1676Sjpk tnet_fini(void)
209*1676Sjpk {
210*1676Sjpk 	kmem_cache_destroy(ire_gw_secattr_cache);
211*1676Sjpk 	mod_hash_destroy_hash(gcdb_hash);
212*1676Sjpk 	mod_hash_destroy_hash(gcgrp4_hash);
213*1676Sjpk 	mod_hash_destroy_hash(gcgrp6_hash);
214*1676Sjpk 	mutex_destroy(&gcdb_lock);
215*1676Sjpk 	mutex_destroy(&gcgrp_lock);
216*1676Sjpk }
217*1676Sjpk 
218*1676Sjpk /* ARGSUSED */
219*1676Sjpk static int
220*1676Sjpk ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags)
221*1676Sjpk {
222*1676Sjpk 	tsol_ire_gw_secattr_t *attrp = buf;
223*1676Sjpk 
224*1676Sjpk 	mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL);
225*1676Sjpk 
226*1676Sjpk 	attrp->igsa_rhc = NULL;
227*1676Sjpk 	attrp->igsa_gc = NULL;
228*1676Sjpk 	attrp->igsa_gcgrp = NULL;
229*1676Sjpk 
230*1676Sjpk 	return (0);
231*1676Sjpk }
232*1676Sjpk 
233*1676Sjpk /* ARGSUSED */
234*1676Sjpk static void
235*1676Sjpk ire_gw_secattr_destructor(void *buf, void *cdrarg)
236*1676Sjpk {
237*1676Sjpk 	tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf;
238*1676Sjpk 
239*1676Sjpk 	mutex_destroy(&attrp->igsa_lock);
240*1676Sjpk }
241*1676Sjpk 
242*1676Sjpk tsol_ire_gw_secattr_t *
243*1676Sjpk ire_gw_secattr_alloc(int kmflags)
244*1676Sjpk {
245*1676Sjpk 	return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags));
246*1676Sjpk }
247*1676Sjpk 
248*1676Sjpk void
249*1676Sjpk ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp)
250*1676Sjpk {
251*1676Sjpk 	ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock));
252*1676Sjpk 
253*1676Sjpk 	if (attrp->igsa_rhc != NULL) {
254*1676Sjpk 		TNRHC_RELE(attrp->igsa_rhc);
255*1676Sjpk 		attrp->igsa_rhc = NULL;
256*1676Sjpk 	}
257*1676Sjpk 
258*1676Sjpk 	if (attrp->igsa_gc != NULL) {
259*1676Sjpk 		GC_REFRELE(attrp->igsa_gc);
260*1676Sjpk 		attrp->igsa_gc = NULL;
261*1676Sjpk 	}
262*1676Sjpk 	if (attrp->igsa_gcgrp != NULL) {
263*1676Sjpk 		GCGRP_REFRELE(attrp->igsa_gcgrp);
264*1676Sjpk 		attrp->igsa_gcgrp = NULL;
265*1676Sjpk 	}
266*1676Sjpk 
267*1676Sjpk 	ASSERT(attrp->igsa_rhc == NULL);
268*1676Sjpk 	ASSERT(attrp->igsa_gc == NULL);
269*1676Sjpk 	ASSERT(attrp->igsa_gcgrp == NULL);
270*1676Sjpk 
271*1676Sjpk 	kmem_cache_free(ire_gw_secattr_cache, attrp);
272*1676Sjpk }
273*1676Sjpk 
274*1676Sjpk /* ARGSUSED */
275*1676Sjpk static uint_t
276*1676Sjpk gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key)
277*1676Sjpk {
278*1676Sjpk 	const struct rtsa_s *rp = (struct rtsa_s *)key;
279*1676Sjpk 	const uint32_t *up, *ue;
280*1676Sjpk 	uint_t hash;
281*1676Sjpk 	int i;
282*1676Sjpk 
283*1676Sjpk 	ASSERT(rp != NULL);
284*1676Sjpk 
285*1676Sjpk 	/* See comments in hash_bylabel in zone.c for details */
286*1676Sjpk 	hash = rp->rtsa_doi + (rp->rtsa_doi << 1);
287*1676Sjpk 	up = (const uint32_t *)&rp->rtsa_slrange;
288*1676Sjpk 	ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up);
289*1676Sjpk 	i = 1;
290*1676Sjpk 	while (up < ue) {
291*1676Sjpk 		/* using 2^n + 1, 1 <= n <= 16 as source of many primes */
292*1676Sjpk 		hash += *up + (*up << ((i % 16) + 1));
293*1676Sjpk 		up++;
294*1676Sjpk 		i++;
295*1676Sjpk 	}
296*1676Sjpk 	return (hash);
297*1676Sjpk }
298*1676Sjpk 
299*1676Sjpk static int
300*1676Sjpk gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
301*1676Sjpk {
302*1676Sjpk 	struct rtsa_s *rp1 = (struct rtsa_s *)key1;
303*1676Sjpk 	struct rtsa_s *rp2 = (struct rtsa_s *)key2;
304*1676Sjpk 
305*1676Sjpk 	ASSERT(rp1 != NULL && rp2 != NULL);
306*1676Sjpk 
307*1676Sjpk 	if (blequal(&rp1->rtsa_slrange.lower_bound,
308*1676Sjpk 	    &rp2->rtsa_slrange.lower_bound) &&
309*1676Sjpk 	    blequal(&rp1->rtsa_slrange.upper_bound,
310*1676Sjpk 	    &rp2->rtsa_slrange.upper_bound) &&
311*1676Sjpk 	    rp1->rtsa_doi == rp2->rtsa_doi)
312*1676Sjpk 		return (0);
313*1676Sjpk 
314*1676Sjpk 	/* No match; not found */
315*1676Sjpk 	return (-1);
316*1676Sjpk }
317*1676Sjpk 
318*1676Sjpk /* ARGSUSED */
319*1676Sjpk static uint_t
320*1676Sjpk gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key)
321*1676Sjpk {
322*1676Sjpk 	tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key;
323*1676Sjpk 	uint_t		idx = 0;
324*1676Sjpk 	uint32_t	*ap;
325*1676Sjpk 
326*1676Sjpk 	ASSERT(ga != NULL);
327*1676Sjpk 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
328*1676Sjpk 
329*1676Sjpk 	ap = (uint32_t *)&ga->ga_addr.s6_addr32[0];
330*1676Sjpk 	idx ^= *ap++;
331*1676Sjpk 	idx ^= *ap++;
332*1676Sjpk 	idx ^= *ap++;
333*1676Sjpk 	idx ^= *ap;
334*1676Sjpk 
335*1676Sjpk 	return (idx);
336*1676Sjpk }
337*1676Sjpk 
338*1676Sjpk static int
339*1676Sjpk gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
340*1676Sjpk {
341*1676Sjpk 	tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1;
342*1676Sjpk 	tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2;
343*1676Sjpk 
344*1676Sjpk 	ASSERT(ga1 != NULL && ga2 != NULL);
345*1676Sjpk 
346*1676Sjpk 	/* Address family must match */
347*1676Sjpk 	if (ga1->ga_af != ga2->ga_af)
348*1676Sjpk 		return (-1);
349*1676Sjpk 
350*1676Sjpk 	if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] &&
351*1676Sjpk 	    ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] &&
352*1676Sjpk 	    ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] &&
353*1676Sjpk 	    ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3])
354*1676Sjpk 		return (0);
355*1676Sjpk 
356*1676Sjpk 	/* No match; not found */
357*1676Sjpk 	return (-1);
358*1676Sjpk }
359*1676Sjpk 
360*1676Sjpk #define	RTSAFLAGS	"\20\11cipso\3doi\2max_sl\1min_sl"
361*1676Sjpk 
362*1676Sjpk int
363*1676Sjpk rtsa_validate(const struct rtsa_s *rp)
364*1676Sjpk {
365*1676Sjpk 	uint32_t mask = rp->rtsa_mask;
366*1676Sjpk 
367*1676Sjpk 	/* RTSA_CIPSO must be set, and DOI must not be zero */
368*1676Sjpk 	if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) {
369*1676Sjpk 		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
370*1676Sjpk 		    "rtsa(1) lacks flag or has 0 doi.",
371*1676Sjpk 		    rtsa_s *, rp);
372*1676Sjpk 		return (EINVAL);
373*1676Sjpk 	}
374*1676Sjpk 	/*
375*1676Sjpk 	 * SL range must be specified, and it must have its
376*1676Sjpk 	 * upper bound dominating its lower bound.
377*1676Sjpk 	 */
378*1676Sjpk 	if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE ||
379*1676Sjpk 	    !bldominates(&rp->rtsa_slrange.upper_bound,
380*1676Sjpk 	    &rp->rtsa_slrange.lower_bound)) {
381*1676Sjpk 		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
382*1676Sjpk 		    "rtsa(1) min_sl and max_sl not set or max_sl is "
383*1676Sjpk 		    "not dominating.", rtsa_s *, rp);
384*1676Sjpk 		return (EINVAL);
385*1676Sjpk 	}
386*1676Sjpk 	return (0);
387*1676Sjpk }
388*1676Sjpk 
389*1676Sjpk /*
390*1676Sjpk  * A brief explanation of the reference counting scheme:
391*1676Sjpk  *
392*1676Sjpk  * Prefix IREs have a non-NULL igsa_gc and a NULL igsa_gcgrp;
393*1676Sjpk  * IRE_CACHEs have it vice-versa.
394*1676Sjpk  *
395*1676Sjpk  * Apart from dynamic references due to to reference holds done
396*1676Sjpk  * actively by threads, we have the following references:
397*1676Sjpk  *
398*1676Sjpk  * gcdb_refcnt:
399*1676Sjpk  *	- Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference
400*1676Sjpk  *	  to the gcdb_refcnt.
401*1676Sjpk  *
402*1676Sjpk  * gc_refcnt:
403*1676Sjpk  *	- A prefix IRE that points to an igsa_gc contributes a reference
404*1676Sjpk  *	  to the gc_refcnt.
405*1676Sjpk  *
406*1676Sjpk  * gcgrp_refcnt:
407*1676Sjpk  *	- An IRE_CACHE that points to an igsa_gcgrp contributes a reference
408*1676Sjpk  *	  to the gcgrp_refcnt of the associated tsol_gcgrp_t.
409*1676Sjpk  *	- Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes
410*1676Sjpk  *	  a reference to the gcgrp_refcnt.
411*1676Sjpk  */
412*1676Sjpk static tsol_gcdb_t *
413*1676Sjpk gcdb_lookup(struct rtsa_s *rp, boolean_t alloc)
414*1676Sjpk {
415*1676Sjpk 	tsol_gcdb_t *gcdb = NULL;
416*1676Sjpk 
417*1676Sjpk 	if (rtsa_validate(rp) != 0)
418*1676Sjpk 		return (NULL);
419*1676Sjpk 
420*1676Sjpk 	mutex_enter(&gcdb_lock);
421*1676Sjpk 	/* Find a copy in the cache; otherwise, create one and cache it */
422*1676Sjpk 	if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp,
423*1676Sjpk 	    (mod_hash_val_t *)&gcdb) == 0) {
424*1676Sjpk 		gcdb->gcdb_refcnt++;
425*1676Sjpk 		ASSERT(gcdb->gcdb_refcnt != 0);
426*1676Sjpk 
427*1676Sjpk 		DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *,
428*1676Sjpk 		    "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb);
429*1676Sjpk 	} else if (alloc) {
430*1676Sjpk 		gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP);
431*1676Sjpk 		if (gcdb != NULL) {
432*1676Sjpk 			gcdb->gcdb_refcnt = 1;
433*1676Sjpk 			gcdb->gcdb_mask = rp->rtsa_mask;
434*1676Sjpk 			gcdb->gcdb_doi = rp->rtsa_doi;
435*1676Sjpk 			gcdb->gcdb_slrange = rp->rtsa_slrange;
436*1676Sjpk 
437*1676Sjpk 			if (mod_hash_insert(gcdb_hash,
438*1676Sjpk 			    (mod_hash_key_t)&gcdb->gcdb_attr,
439*1676Sjpk 			    (mod_hash_val_t)gcdb) != 0) {
440*1676Sjpk 				mutex_exit(&gcdb_lock);
441*1676Sjpk 				kmem_free(gcdb, sizeof (*gcdb));
442*1676Sjpk 				return (NULL);
443*1676Sjpk 			}
444*1676Sjpk 
445*1676Sjpk 			DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *,
446*1676Sjpk 			    "gcdb(1) inserted in gcdb_hash(global)",
447*1676Sjpk 			    tsol_gcdb_t *, gcdb);
448*1676Sjpk 		}
449*1676Sjpk 	}
450*1676Sjpk 	mutex_exit(&gcdb_lock);
451*1676Sjpk 	return (gcdb);
452*1676Sjpk }
453*1676Sjpk 
454*1676Sjpk static void
455*1676Sjpk gcdb_inactive(tsol_gcdb_t *gcdb)
456*1676Sjpk {
457*1676Sjpk 	ASSERT(MUTEX_HELD(&gcdb_lock));
458*1676Sjpk 	ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0);
459*1676Sjpk 
460*1676Sjpk 	(void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr,
461*1676Sjpk 	    (mod_hash_val_t *)&gcdb);
462*1676Sjpk 
463*1676Sjpk 	DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *,
464*1676Sjpk 	    "gcdb(1) removed from gcdb_hash(global)",
465*1676Sjpk 	    tsol_gcdb_t *, gcdb);
466*1676Sjpk 	kmem_free(gcdb, sizeof (*gcdb));
467*1676Sjpk }
468*1676Sjpk 
469*1676Sjpk tsol_gc_t *
470*1676Sjpk gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp)
471*1676Sjpk {
472*1676Sjpk 	tsol_gc_t *gc;
473*1676Sjpk 	tsol_gcdb_t *gcdb;
474*1676Sjpk 
475*1676Sjpk 	*gcgrp_xtrarefp = B_TRUE;
476*1676Sjpk 
477*1676Sjpk 	rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER);
478*1676Sjpk 	if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) {
479*1676Sjpk 		rw_exit(&gcgrp->gcgrp_rwlock);
480*1676Sjpk 		return (NULL);
481*1676Sjpk 	}
482*1676Sjpk 
483*1676Sjpk 	for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) {
484*1676Sjpk 		if (gc->gc_db == gcdb) {
485*1676Sjpk 			ASSERT(gc->gc_grp == gcgrp);
486*1676Sjpk 
487*1676Sjpk 			gc->gc_refcnt++;
488*1676Sjpk 			ASSERT(gc->gc_refcnt != 0);
489*1676Sjpk 
490*1676Sjpk 			GCDB_REFRELE(gcdb);
491*1676Sjpk 
492*1676Sjpk 			DTRACE_PROBE3(tx__gcdb__log__info__gc__create,
493*1676Sjpk 			    char *, "found gc(1) in gcgrp(2)",
494*1676Sjpk 			    tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
495*1676Sjpk 			rw_exit(&gcgrp->gcgrp_rwlock);
496*1676Sjpk 			return (gc);
497*1676Sjpk 		}
498*1676Sjpk 	}
499*1676Sjpk 
500*1676Sjpk 	gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP);
501*1676Sjpk 	if (gc != NULL) {
502*1676Sjpk 		if (gcgrp->gcgrp_head == NULL) {
503*1676Sjpk 			gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc;
504*1676Sjpk 		} else {
505*1676Sjpk 			gcgrp->gcgrp_tail->gc_next = gc;
506*1676Sjpk 			gc->gc_prev = gcgrp->gcgrp_tail;
507*1676Sjpk 			gcgrp->gcgrp_tail = gc;
508*1676Sjpk 		}
509*1676Sjpk 		gcgrp->gcgrp_count++;
510*1676Sjpk 		ASSERT(gcgrp->gcgrp_count != 0);
511*1676Sjpk 
512*1676Sjpk 		/* caller has incremented gcgrp reference for us */
513*1676Sjpk 		gc->gc_grp = gcgrp;
514*1676Sjpk 
515*1676Sjpk 		gc->gc_db = gcdb;
516*1676Sjpk 		gc->gc_refcnt = 1;
517*1676Sjpk 
518*1676Sjpk 		DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *,
519*1676Sjpk 		    "added gc(1) to gcgrp(2)", tsol_gc_t *, gc,
520*1676Sjpk 		    tsol_gcgrp_t *, gcgrp);
521*1676Sjpk 
522*1676Sjpk 		*gcgrp_xtrarefp = B_FALSE;
523*1676Sjpk 	}
524*1676Sjpk 	rw_exit(&gcgrp->gcgrp_rwlock);
525*1676Sjpk 
526*1676Sjpk 	return (gc);
527*1676Sjpk }
528*1676Sjpk 
529*1676Sjpk void
530*1676Sjpk gc_inactive(tsol_gc_t *gc)
531*1676Sjpk {
532*1676Sjpk 	tsol_gcgrp_t *gcgrp = gc->gc_grp;
533*1676Sjpk 
534*1676Sjpk 	ASSERT(gcgrp != NULL);
535*1676Sjpk 	ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock));
536*1676Sjpk 	ASSERT(gc->gc_refcnt == 0);
537*1676Sjpk 
538*1676Sjpk 	if (gc->gc_prev != NULL)
539*1676Sjpk 		gc->gc_prev->gc_next = gc->gc_next;
540*1676Sjpk 	else
541*1676Sjpk 		gcgrp->gcgrp_head = gc->gc_next;
542*1676Sjpk 	if (gc->gc_next != NULL)
543*1676Sjpk 		gc->gc_next->gc_prev = gc->gc_prev;
544*1676Sjpk 	else
545*1676Sjpk 		gcgrp->gcgrp_tail = gc->gc_prev;
546*1676Sjpk 	ASSERT(gcgrp->gcgrp_count > 0);
547*1676Sjpk 	gcgrp->gcgrp_count--;
548*1676Sjpk 
549*1676Sjpk 	/* drop lock before it's destroyed */
550*1676Sjpk 	rw_exit(&gcgrp->gcgrp_rwlock);
551*1676Sjpk 
552*1676Sjpk 	DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *,
553*1676Sjpk 	    "removed inactive gc(1) from gcgrp(2)",
554*1676Sjpk 	    tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
555*1676Sjpk 
556*1676Sjpk 	GCGRP_REFRELE(gcgrp);
557*1676Sjpk 
558*1676Sjpk 	gc->gc_grp = NULL;
559*1676Sjpk 	gc->gc_prev = gc->gc_next = NULL;
560*1676Sjpk 
561*1676Sjpk 	if (gc->gc_db != NULL)
562*1676Sjpk 		GCDB_REFRELE(gc->gc_db);
563*1676Sjpk 
564*1676Sjpk 	kmem_free(gc, sizeof (*gc));
565*1676Sjpk }
566*1676Sjpk 
567*1676Sjpk tsol_gcgrp_t *
568*1676Sjpk gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc)
569*1676Sjpk {
570*1676Sjpk 	tsol_gcgrp_t *gcgrp = NULL;
571*1676Sjpk 	mod_hash_t *hashp;
572*1676Sjpk 
573*1676Sjpk 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
574*1676Sjpk 
575*1676Sjpk 	hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
576*1676Sjpk 
577*1676Sjpk 	mutex_enter(&gcgrp_lock);
578*1676Sjpk 	if (mod_hash_find(hashp, (mod_hash_key_t)ga,
579*1676Sjpk 	    (mod_hash_val_t *)&gcgrp) == 0) {
580*1676Sjpk 		gcgrp->gcgrp_refcnt++;
581*1676Sjpk 		ASSERT(gcgrp->gcgrp_refcnt != 0);
582*1676Sjpk 
583*1676Sjpk 		DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *,
584*1676Sjpk 		    "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp,
585*1676Sjpk 		    mod_hash_t *, hashp);
586*1676Sjpk 
587*1676Sjpk 	} else if (alloc) {
588*1676Sjpk 		gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP);
589*1676Sjpk 		if (gcgrp != NULL) {
590*1676Sjpk 			gcgrp->gcgrp_refcnt = 1;
591*1676Sjpk 			rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL);
592*1676Sjpk 			bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga));
593*1676Sjpk 
594*1676Sjpk 			if (mod_hash_insert(hashp,
595*1676Sjpk 			    (mod_hash_key_t)&gcgrp->gcgrp_addr,
596*1676Sjpk 			    (mod_hash_val_t)gcgrp) != 0) {
597*1676Sjpk 				mutex_exit(&gcgrp_lock);
598*1676Sjpk 				kmem_free(gcgrp, sizeof (*gcgrp));
599*1676Sjpk 				return (NULL);
600*1676Sjpk 			}
601*1676Sjpk 
602*1676Sjpk 			DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert,
603*1676Sjpk 			    char *, "inserted gcgrp(1) in hash(2)",
604*1676Sjpk 			    tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
605*1676Sjpk 		}
606*1676Sjpk 	}
607*1676Sjpk 	mutex_exit(&gcgrp_lock);
608*1676Sjpk 	return (gcgrp);
609*1676Sjpk }
610*1676Sjpk 
611*1676Sjpk void
612*1676Sjpk gcgrp_inactive(tsol_gcgrp_t *gcgrp)
613*1676Sjpk {
614*1676Sjpk 	tsol_gcgrp_addr_t *ga;
615*1676Sjpk 	mod_hash_t *hashp;
616*1676Sjpk 
617*1676Sjpk 	ASSERT(MUTEX_HELD(&gcgrp_lock));
618*1676Sjpk 	ASSERT(!RW_LOCK_HELD(&gcgrp->gcgrp_rwlock));
619*1676Sjpk 	ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0);
620*1676Sjpk 	ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0);
621*1676Sjpk 
622*1676Sjpk 	ga = &gcgrp->gcgrp_addr;
623*1676Sjpk 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
624*1676Sjpk 
625*1676Sjpk 	hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
626*1676Sjpk 	(void) mod_hash_remove(hashp, (mod_hash_key_t)ga,
627*1676Sjpk 	    (mod_hash_val_t *)&gcgrp);
628*1676Sjpk 	rw_destroy(&gcgrp->gcgrp_rwlock);
629*1676Sjpk 
630*1676Sjpk 	DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *,
631*1676Sjpk 	    "removed inactive gcgrp(1) from hash(2)",
632*1676Sjpk 	    tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
633*1676Sjpk 
634*1676Sjpk 	kmem_free(gcgrp, sizeof (*gcgrp));
635*1676Sjpk }
636*1676Sjpk 
637*1676Sjpk /*
638*1676Sjpk  * Converts CIPSO option to sensitivity label.
639*1676Sjpk  * Validity checks based on restrictions defined in
640*1676Sjpk  * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity)
641*1676Sjpk  */
642*1676Sjpk static boolean_t
643*1676Sjpk cipso_to_sl(const uchar_t *option, bslabel_t *sl)
644*1676Sjpk {
645*1676Sjpk 	const struct cipso_option *co = (const struct cipso_option *)option;
646*1676Sjpk 	const struct cipso_tag_type_1 *tt1;
647*1676Sjpk 
648*1676Sjpk 	tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0];
649*1676Sjpk 	if (tt1->tag_type != 1 ||
650*1676Sjpk 	    tt1->tag_length < TSOL_TT1_MIN_LENGTH ||
651*1676Sjpk 	    tt1->tag_length > TSOL_TT1_MAX_LENGTH ||
652*1676Sjpk 	    tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length)
653*1676Sjpk 		return (B_FALSE);
654*1676Sjpk 
655*1676Sjpk 	bsllow(sl);	/* assumed: sets compartments to all zeroes */
656*1676Sjpk 	LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl);
657*1676Sjpk 	bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments,
658*1676Sjpk 	    tt1->tag_length - TSOL_TT1_MIN_LENGTH);
659*1676Sjpk 	return (B_TRUE);
660*1676Sjpk }
661*1676Sjpk 
662*1676Sjpk /*
663*1676Sjpk  * Parse the CIPSO label in the incoming packet and construct a ts_label_t
664*1676Sjpk  * that reflects the CIPSO label and attach it to the dblk cred. Later as
665*1676Sjpk  * the mblk flows up through the stack any code that needs to examine the
666*1676Sjpk  * packet label can inspect the label from the dblk cred. This function is
667*1676Sjpk  * called right in ip_rput for all packets, i.e. locally destined and
668*1676Sjpk  * to be forwarded packets. The forwarding path needs to examine the label
669*1676Sjpk  * to determine how to forward the packet.
670*1676Sjpk  *
671*1676Sjpk  * For IPv4, IP header options have been pulled up, but other headers might not
672*1676Sjpk  * have been.  For IPv6, any hop-by-hop options have been pulled up, but any
673*1676Sjpk  * other headers might not be present.
674*1676Sjpk  */
675*1676Sjpk boolean_t
676*1676Sjpk tsol_get_pkt_label(mblk_t *mp, int version)
677*1676Sjpk {
678*1676Sjpk 	tsol_tpc_t	*src_rhtp;
679*1676Sjpk 	uchar_t		*opt_ptr = NULL;
680*1676Sjpk 	const ipha_t	*ipha;
681*1676Sjpk 	bslabel_t	sl;
682*1676Sjpk 	uint32_t	doi;
683*1676Sjpk 	tsol_ip_label_t	label_type;
684*1676Sjpk 	const cipso_option_t *co;
685*1676Sjpk 	const void	*src;
686*1676Sjpk 	const ip6_t	*ip6h;
687*1676Sjpk 
688*1676Sjpk 	ASSERT(DB_TYPE(mp) == M_DATA);
689*1676Sjpk 
690*1676Sjpk 	if (version == IPV4_VERSION) {
691*1676Sjpk 		ipha = (const ipha_t *)mp->b_rptr;
692*1676Sjpk 		src = &ipha->ipha_src;
693*1676Sjpk 		label_type = tsol_get_option(mp, &opt_ptr);
694*1676Sjpk 	} else {
695*1676Sjpk 		uchar_t		*after_secopt;
696*1676Sjpk 		boolean_t	hbh_needed;
697*1676Sjpk 		const uchar_t	*ip6hbh;
698*1676Sjpk 		size_t		optlen;
699*1676Sjpk 
700*1676Sjpk 		label_type = OPT_NONE;
701*1676Sjpk 		ip6h = (const ip6_t *)mp->b_rptr;
702*1676Sjpk 		src = &ip6h->ip6_src;
703*1676Sjpk 		if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
704*1676Sjpk 			ip6hbh = (const uchar_t *)&ip6h[1];
705*1676Sjpk 			optlen = (ip6hbh[1] + 1) << 3;
706*1676Sjpk 			ASSERT(ip6hbh + optlen <= mp->b_wptr);
707*1676Sjpk 			opt_ptr = tsol_find_secopt_v6(ip6hbh, optlen,
708*1676Sjpk 			    &after_secopt, &hbh_needed);
709*1676Sjpk 			/* tsol_find_secopt_v6 guarantees some sanity */
710*1676Sjpk 			if (opt_ptr != NULL &&
711*1676Sjpk 			    (optlen = opt_ptr[1]) >= 8) {
712*1676Sjpk 				opt_ptr += 2;
713*1676Sjpk 				bcopy(opt_ptr, &doi, sizeof (doi));
714*1676Sjpk 				doi = ntohl(doi);
715*1676Sjpk 				if (doi == IP6LS_DOI_V4 &&
716*1676Sjpk 				    opt_ptr[4] == IP6LS_TT_V4 &&
717*1676Sjpk 				    opt_ptr[5] <= optlen - 4 &&
718*1676Sjpk 				    opt_ptr[7] <= optlen - 6) {
719*1676Sjpk 					opt_ptr += sizeof (doi) + 2;
720*1676Sjpk 					label_type = OPT_CIPSO;
721*1676Sjpk 				}
722*1676Sjpk 			}
723*1676Sjpk 		}
724*1676Sjpk 	}
725*1676Sjpk 
726*1676Sjpk 	switch (label_type) {
727*1676Sjpk 	case OPT_CIPSO:
728*1676Sjpk 		/*
729*1676Sjpk 		 * Convert the CIPSO label to the internal format
730*1676Sjpk 		 * and attach it to the dblk cred.
731*1676Sjpk 		 * Validity checks based on restrictions defined in
732*1676Sjpk 		 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
733*1676Sjpk 		 * (draft-ietf-cipso-ipsecurity)
734*1676Sjpk 		 */
735*1676Sjpk 		if (version == IPV6_VERSION && ip6opt_ls == 0)
736*1676Sjpk 			return (B_FALSE);
737*1676Sjpk 		co = (const struct cipso_option *)opt_ptr;
738*1676Sjpk 		if ((co->cipso_length <
739*1676Sjpk 		    TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) ||
740*1676Sjpk 		    (co->cipso_length > IP_MAX_OPT_LENGTH))
741*1676Sjpk 			return (B_FALSE);
742*1676Sjpk 		bcopy(co->cipso_doi, &doi, sizeof (doi));
743*1676Sjpk 		doi = ntohl(doi);
744*1676Sjpk 		if (!cipso_to_sl(opt_ptr, &sl))
745*1676Sjpk 			return (B_FALSE);
746*1676Sjpk 		setbltype(&sl, SUN_SL_ID);
747*1676Sjpk 		break;
748*1676Sjpk 
749*1676Sjpk 	case OPT_NONE:
750*1676Sjpk 		/*
751*1676Sjpk 		 * Handle special cases that are not currently labeled, even
752*1676Sjpk 		 * though the sending system may otherwise be configured as
753*1676Sjpk 		 * labeled.
754*1676Sjpk 		 *	- IGMP
755*1676Sjpk 		 *	- IPv4 ICMP Router Discovery
756*1676Sjpk 		 *	- IPv6 Neighbor Discovery
757*1676Sjpk 		 */
758*1676Sjpk 		if (version == IPV4_VERSION) {
759*1676Sjpk 			if (ipha->ipha_protocol == IPPROTO_IGMP)
760*1676Sjpk 				return (B_TRUE);
761*1676Sjpk 			if (ipha->ipha_protocol == IPPROTO_ICMP) {
762*1676Sjpk 				const struct icmp *icmp = (const struct icmp *)
763*1676Sjpk 				    (mp->b_rptr + IPH_HDR_LENGTH(ipha));
764*1676Sjpk 
765*1676Sjpk 				if ((uchar_t *)icmp > mp->b_wptr) {
766*1676Sjpk 					if (!pullupmsg(mp,
767*1676Sjpk 					    (uchar_t *)icmp - mp->b_rptr + 1))
768*1676Sjpk 						return (B_FALSE);
769*1676Sjpk 					icmp = (const struct icmp *)
770*1676Sjpk 					    (mp->b_rptr +
771*1676Sjpk 					    IPH_HDR_LENGTH(ipha));
772*1676Sjpk 				}
773*1676Sjpk 				if (icmp->icmp_type == ICMP_ROUTERADVERT ||
774*1676Sjpk 				    icmp->icmp_type == ICMP_ROUTERSOLICIT)
775*1676Sjpk 					return (B_TRUE);
776*1676Sjpk 			}
777*1676Sjpk 			src = &ipha->ipha_src;
778*1676Sjpk 		} else {
779*1676Sjpk 			if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
780*1676Sjpk 				const icmp6_t *icmp6 = (const icmp6_t *)
781*1676Sjpk 				    (mp->b_rptr + IPV6_HDR_LEN);
782*1676Sjpk 
783*1676Sjpk 				if ((uchar_t *)icmp6 + ICMP6_MINLEN >
784*1676Sjpk 				    mp->b_wptr) {
785*1676Sjpk 					if (!pullupmsg(mp,
786*1676Sjpk 					    (uchar_t *)icmp6 - mp->b_rptr +
787*1676Sjpk 					    ICMP6_MINLEN))
788*1676Sjpk 						return (B_FALSE);
789*1676Sjpk 					icmp6 = (const icmp6_t *)
790*1676Sjpk 					    (mp->b_rptr + IPV6_HDR_LEN);
791*1676Sjpk 				}
792*1676Sjpk 				if (icmp6->icmp6_type >= MLD_LISTENER_QUERY &&
793*1676Sjpk 				    icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE)
794*1676Sjpk 					return (B_TRUE);
795*1676Sjpk 			}
796*1676Sjpk 			src = &ip6h->ip6_src;
797*1676Sjpk 		}
798*1676Sjpk 
799*1676Sjpk 		/*
800*1676Sjpk 		 * Look up the tnrhtp database and get the implicit label
801*1676Sjpk 		 * that is associated with this unlabeled host and attach
802*1676Sjpk 		 * it to the packet.
803*1676Sjpk 		 */
804*1676Sjpk 		if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
805*1676Sjpk 			return (B_FALSE);
806*1676Sjpk 
807*1676Sjpk 		/* If the sender is labeled, drop the unlabeled packet. */
808*1676Sjpk 		if (src_rhtp->tpc_tp.host_type != UNLABELED) {
809*1676Sjpk 			TPC_RELE(src_rhtp);
810*1676Sjpk 			pr_addr_dbg("unlabeled packet forged from %s\n",
811*1676Sjpk 			    version == IPV4_VERSION ? AF_INET : AF_INET6, src);
812*1676Sjpk 			return (B_FALSE);
813*1676Sjpk 		}
814*1676Sjpk 
815*1676Sjpk 		sl = src_rhtp->tpc_tp.tp_def_label;
816*1676Sjpk 		setbltype(&sl, SUN_SL_ID);
817*1676Sjpk 		doi = src_rhtp->tpc_tp.tp_doi;
818*1676Sjpk 		TPC_RELE(src_rhtp);
819*1676Sjpk 		break;
820*1676Sjpk 
821*1676Sjpk 	default:
822*1676Sjpk 		return (B_FALSE);
823*1676Sjpk 	}
824*1676Sjpk 
825*1676Sjpk 	/* Make sure no other thread is messing with this mblk */
826*1676Sjpk 	ASSERT(DB_REF(mp) == 1);
827*1676Sjpk 	if (DB_CRED(mp) == NULL) {
828*1676Sjpk 		DB_CRED(mp) = newcred_from_bslabel(&sl, doi, KM_NOSLEEP);
829*1676Sjpk 		if (DB_CRED(mp) == NULL)
830*1676Sjpk 			return (B_FALSE);
831*1676Sjpk 	} else {
832*1676Sjpk 		cred_t	*newcr;
833*1676Sjpk 
834*1676Sjpk 		newcr = copycred_from_bslabel(DB_CRED(mp), &sl, doi,
835*1676Sjpk 		    KM_NOSLEEP);
836*1676Sjpk 		if (newcr == NULL)
837*1676Sjpk 			return (B_FALSE);
838*1676Sjpk 		crfree(DB_CRED(mp));
839*1676Sjpk 		DB_CRED(mp) = newcr;
840*1676Sjpk 	}
841*1676Sjpk 
842*1676Sjpk 	/*
843*1676Sjpk 	 * If the source was unlabeled, then flag as such,
844*1676Sjpk 	 * while remembering that CIPSO routers add headers.
845*1676Sjpk 	 */
846*1676Sjpk 	if (label_type == OPT_NONE)
847*1676Sjpk 		crgetlabel(DB_CRED(mp))->tsl_flags |= TSLF_UNLABELED;
848*1676Sjpk 	else if (label_type == OPT_CIPSO) {
849*1676Sjpk 		if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
850*1676Sjpk 			return (B_FALSE);
851*1676Sjpk 		if (src_rhtp->tpc_tp.host_type == UNLABELED)
852*1676Sjpk 		    crgetlabel(DB_CRED(mp))->tsl_flags |=
853*1676Sjpk 		    TSLF_UNLABELED;
854*1676Sjpk 		TPC_RELE(src_rhtp);
855*1676Sjpk 	}
856*1676Sjpk 
857*1676Sjpk 	return (B_TRUE);
858*1676Sjpk }
859*1676Sjpk 
860*1676Sjpk /*
861*1676Sjpk  * This routine determines whether the given packet should be accepted locally.
862*1676Sjpk  * It does a range/set check on the packet's label by looking up the given
863*1676Sjpk  * address in the remote host database.
864*1676Sjpk  */
865*1676Sjpk boolean_t
866*1676Sjpk tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
867*1676Sjpk     boolean_t shared_addr, const conn_t *connp)
868*1676Sjpk {
869*1676Sjpk 	const cred_t *credp;
870*1676Sjpk 	ts_label_t *plabel, *conn_plabel;
871*1676Sjpk 	tsol_tpc_t *tp;
872*1676Sjpk 	boolean_t retv;
873*1676Sjpk 	const bslabel_t *label, *conn_label;
874*1676Sjpk 
875*1676Sjpk 	/*
876*1676Sjpk 	 * The cases in which this can happen are:
877*1676Sjpk 	 *	- IPv6 Router Alert, where ip_rput_data_v6 deliberately skips
878*1676Sjpk 	 *	  over the label attachment process.
879*1676Sjpk 	 *	- MLD output looped-back to ourselves.
880*1676Sjpk 	 *	- IPv4 Router Discovery, where tsol_get_pkt_label intentionally
881*1676Sjpk 	 *	  avoids the labeling process.
882*1676Sjpk 	 * We trust that all valid paths in the code set the cred pointer when
883*1676Sjpk 	 * needed.
884*1676Sjpk 	 */
885*1676Sjpk 	if ((credp = DB_CRED(mp)) == NULL)
886*1676Sjpk 		return (B_TRUE);
887*1676Sjpk 
888*1676Sjpk 	/*
889*1676Sjpk 	 * If this packet is from the inside (not a remote host) and has the
890*1676Sjpk 	 * same zoneid as the selected destination, then no checks are
891*1676Sjpk 	 * necessary.  Membership in the zone is enough proof.  This is
892*1676Sjpk 	 * intended to be a hot path through this function.
893*1676Sjpk 	 */
894*1676Sjpk 	if (!crisremote(credp) &&
895*1676Sjpk 	    crgetzone(credp) == crgetzone(connp->conn_cred))
896*1676Sjpk 		return (B_TRUE);
897*1676Sjpk 
898*1676Sjpk 	plabel = crgetlabel(credp);
899*1676Sjpk 	conn_plabel = crgetlabel(connp->conn_cred);
900*1676Sjpk 	ASSERT(plabel != NULL && conn_plabel != NULL);
901*1676Sjpk 
902*1676Sjpk 	label = label2bslabel(plabel);
903*1676Sjpk 	conn_label = label2bslabel(crgetlabel(connp->conn_cred));
904*1676Sjpk 
905*1676Sjpk 	/*
906*1676Sjpk 	 * MLPs are always validated using the range and set of the local
907*1676Sjpk 	 * address, even when the remote host is unlabeled.
908*1676Sjpk 	 */
909*1676Sjpk 	if (connp->conn_mlp_type == mlptBoth ||
910*1676Sjpk 	/* LINTED: no consequent */
911*1676Sjpk 	    connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) {
912*1676Sjpk 		;
913*1676Sjpk 
914*1676Sjpk 	/*
915*1676Sjpk 	 * If this is a packet from an unlabeled sender, then we must apply
916*1676Sjpk 	 * different rules.  If the label is equal to the zone's label, then
917*1676Sjpk 	 * it's allowed.  If it's not equal, but the zone is either the global
918*1676Sjpk 	 * zone or the label is dominated by the zone's label, then allow it
919*1676Sjpk 	 * as long as it's in the range configured for the destination.
920*1676Sjpk 	 */
921*1676Sjpk 	} else if (plabel->tsl_flags & TSLF_UNLABELED) {
922*1676Sjpk 		if (plabel->tsl_doi == conn_plabel->tsl_doi &&
923*1676Sjpk 		    blequal(label, conn_label))
924*1676Sjpk 			return (B_TRUE);
925*1676Sjpk 
926*1676Sjpk 		if (!connp->conn_mac_exempt ||
927*1676Sjpk 		    (connp->conn_zoneid != GLOBAL_ZONEID &&
928*1676Sjpk 		    (plabel->tsl_doi != conn_plabel->tsl_doi ||
929*1676Sjpk 		    !bldominates(conn_label, label)))) {
930*1676Sjpk 			DTRACE_PROBE3(
931*1676Sjpk 			    tx__ip__log__drop__receivelocal__mac_unl,
932*1676Sjpk 			    char *,
933*1676Sjpk 			    "unlabeled packet mp(1) fails mac for conn(2)",
934*1676Sjpk 			    mblk_t *, mp, conn_t *, connp);
935*1676Sjpk 			return (B_FALSE);
936*1676Sjpk 		}
937*1676Sjpk 
938*1676Sjpk 	/*
939*1676Sjpk 	 * If this is a private address and the connection is SLP for private
940*1676Sjpk 	 * addresses, then the only thing that matters is the label on the
941*1676Sjpk 	 * zone, which is the same as the label on the connection.  We don't
942*1676Sjpk 	 * care (and don't have to care) about the tnrhdb.
943*1676Sjpk 	 */
944*1676Sjpk 	} else if (!shared_addr) {
945*1676Sjpk 		/*
946*1676Sjpk 		 * Since this is a zone-specific address, we know that any MLP
947*1676Sjpk 		 * case should have been handled up above.  That means this
948*1676Sjpk 		 * connection must not be MLP for zone-specific addresses.  We
949*1676Sjpk 		 * assert that to be true.
950*1676Sjpk 		 */
951*1676Sjpk 		ASSERT(connp->conn_mlp_type == mlptSingle ||
952*1676Sjpk 		    connp->conn_mlp_type == mlptShared);
953*1676Sjpk 		if (plabel->tsl_doi == conn_plabel->tsl_doi &&
954*1676Sjpk 		    blequal(label, conn_label))
955*1676Sjpk 			return (B_TRUE);
956*1676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp,
957*1676Sjpk 		    char *, "packet mp(1) fails exactly SLP match conn(2)",
958*1676Sjpk 		    mblk_t *, mp, conn_t *, connp);
959*1676Sjpk 		return (B_FALSE);
960*1676Sjpk 	}
961*1676Sjpk 
962*1676Sjpk 	tp = find_tpc(addr, version, B_FALSE);
963*1676Sjpk 	if (tp == NULL) {
964*1676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr,
965*1676Sjpk 		    char *, "dropping mp(1), host(2) lacks entry",
966*1676Sjpk 		    mblk_t *, mp, void *, addr);
967*1676Sjpk 		return (B_FALSE);
968*1676Sjpk 	}
969*1676Sjpk 
970*1676Sjpk 	/*
971*1676Sjpk 	 * The local host address should not be unlabeled at this point.  The
972*1676Sjpk 	 * only way this can happen is that the destination isn't unicast.  We
973*1676Sjpk 	 * assume that the packet should not have had a label, and thus should
974*1676Sjpk 	 * have been handled by the TSLF_UNLABELED logic above.
975*1676Sjpk 	 */
976*1676Sjpk 	if (tp->tpc_tp.host_type == UNLABELED) {
977*1676Sjpk 		retv = B_FALSE;
978*1676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *,
979*1676Sjpk 		    "mp(1) unlabeled source, but tp is not unlabeled.",
980*1676Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
981*1676Sjpk 
982*1676Sjpk 	} else if (tp->tpc_tp.host_type != SUN_CIPSO) {
983*1676Sjpk 		retv = B_FALSE;
984*1676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *,
985*1676Sjpk 		    "delivering mp(1), found unrecognized tpc(2) type.",
986*1676Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
987*1676Sjpk 
988*1676Sjpk 	} else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
989*1676Sjpk 		retv = B_FALSE;
990*1676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
991*1676Sjpk 		    "mp(1) could not be delievered to tp(2), doi mismatch",
992*1676Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
993*1676Sjpk 
994*1676Sjpk 	} else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) &&
995*1676Sjpk 	    !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) {
996*1676Sjpk 		retv = B_FALSE;
997*1676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
998*1676Sjpk 		    "mp(1) could not be delievered to tp(2), bad mac",
999*1676Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
1000*1676Sjpk 	} else {
1001*1676Sjpk 		retv = B_TRUE;
1002*1676Sjpk 	}
1003*1676Sjpk 
1004*1676Sjpk 	TPC_RELE(tp);
1005*1676Sjpk 
1006*1676Sjpk 	return (retv);
1007*1676Sjpk }
1008*1676Sjpk 
1009*1676Sjpk boolean_t
1010*1676Sjpk tsol_can_accept_raw(mblk_t *mp, boolean_t check_host)
1011*1676Sjpk {
1012*1676Sjpk 	ts_label_t	*plabel = NULL;
1013*1676Sjpk 	tsol_tpc_t	*src_rhtp, *dst_rhtp;
1014*1676Sjpk 	boolean_t	retv;
1015*1676Sjpk 
1016*1676Sjpk 	if (DB_CRED(mp) != NULL)
1017*1676Sjpk 		plabel = crgetlabel(DB_CRED(mp));
1018*1676Sjpk 
1019*1676Sjpk 	/* We are bootstrapping or the internal template was never deleted */
1020*1676Sjpk 	if (plabel == NULL)
1021*1676Sjpk 		return (B_TRUE);
1022*1676Sjpk 
1023*1676Sjpk 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1024*1676Sjpk 		ipha_t *ipha = (ipha_t *)mp->b_rptr;
1025*1676Sjpk 
1026*1676Sjpk 		src_rhtp = find_tpc(&ipha->ipha_src, IPV4_VERSION,
1027*1676Sjpk 		    B_FALSE);
1028*1676Sjpk 		if (src_rhtp == NULL)
1029*1676Sjpk 			return (B_FALSE);
1030*1676Sjpk 		dst_rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION,
1031*1676Sjpk 		    B_FALSE);
1032*1676Sjpk 	} else {
1033*1676Sjpk 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
1034*1676Sjpk 
1035*1676Sjpk 		src_rhtp = find_tpc(&ip6h->ip6_src, IPV6_VERSION,
1036*1676Sjpk 		    B_FALSE);
1037*1676Sjpk 		if (src_rhtp == NULL)
1038*1676Sjpk 			return (B_FALSE);
1039*1676Sjpk 		dst_rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION,
1040*1676Sjpk 		    B_FALSE);
1041*1676Sjpk 	}
1042*1676Sjpk 	if (dst_rhtp == NULL) {
1043*1676Sjpk 		TPC_RELE(src_rhtp);
1044*1676Sjpk 		return (B_FALSE);
1045*1676Sjpk 	}
1046*1676Sjpk 
1047*1676Sjpk 	if (label2doi(plabel) != src_rhtp->tpc_tp.tp_doi) {
1048*1676Sjpk 		retv = B_FALSE;
1049*1676Sjpk 
1050*1676Sjpk 	/*
1051*1676Sjpk 	 * Check that the packet's label is in the correct range for labeled
1052*1676Sjpk 	 * sender, or is equal to the default label for unlabeled sender.
1053*1676Sjpk 	 */
1054*1676Sjpk 	} else if ((src_rhtp->tpc_tp.host_type != UNLABELED &&
1055*1676Sjpk 	    !_blinrange(label2bslabel(plabel),
1056*1676Sjpk 	    &src_rhtp->tpc_tp.tp_sl_range_cipso) &&
1057*1676Sjpk 	    !blinlset(label2bslabel(plabel),
1058*1676Sjpk 	    src_rhtp->tpc_tp.tp_sl_set_cipso)) ||
1059*1676Sjpk 	    (src_rhtp->tpc_tp.host_type == UNLABELED &&
1060*1676Sjpk 	    !blequal(&plabel->tsl_label, &src_rhtp->tpc_tp.tp_def_label))) {
1061*1676Sjpk 		retv = B_FALSE;
1062*1676Sjpk 
1063*1676Sjpk 	} else if (check_host) {
1064*1676Sjpk 		retv = B_TRUE;
1065*1676Sjpk 
1066*1676Sjpk 	/*
1067*1676Sjpk 	 * Until we have SL range in the Zone structure, pass it
1068*1676Sjpk 	 * when our own address lookup returned an internal entry.
1069*1676Sjpk 	 */
1070*1676Sjpk 	} else switch (dst_rhtp->tpc_tp.host_type) {
1071*1676Sjpk 	case UNLABELED:
1072*1676Sjpk 		retv = B_TRUE;
1073*1676Sjpk 		break;
1074*1676Sjpk 
1075*1676Sjpk 	case SUN_CIPSO:
1076*1676Sjpk 		retv = _blinrange(label2bslabel(plabel),
1077*1676Sjpk 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) ||
1078*1676Sjpk 		    blinlset(label2bslabel(plabel),
1079*1676Sjpk 		    dst_rhtp->tpc_tp.tp_sl_set_cipso);
1080*1676Sjpk 		break;
1081*1676Sjpk 
1082*1676Sjpk 	default:
1083*1676Sjpk 		retv = B_FALSE;
1084*1676Sjpk 	}
1085*1676Sjpk 	TPC_RELE(src_rhtp);
1086*1676Sjpk 	TPC_RELE(dst_rhtp);
1087*1676Sjpk 	return (retv);
1088*1676Sjpk }
1089*1676Sjpk 
1090*1676Sjpk /*
1091*1676Sjpk  * This routine determines whether a response to a failed packet delivery or
1092*1676Sjpk  * connection should be sent back.  By default, the policy is to allow such
1093*1676Sjpk  * messages to be sent at all times, as these messages reveal little useful
1094*1676Sjpk  * information and are healthy parts of TCP/IP networking.
1095*1676Sjpk  *
1096*1676Sjpk  * If tsol_strict_error is set, then we do strict tests: if the packet label is
1097*1676Sjpk  * within the label range/set of this host/zone, return B_TRUE; otherwise
1098*1676Sjpk  * return B_FALSE, which causes the packet to be dropped silently.
1099*1676Sjpk  *
1100*1676Sjpk  * Note that tsol_get_pkt_label will cause the packet to drop if the sender is
1101*1676Sjpk  * marked as labeled in the remote host database, but the packet lacks a label.
1102*1676Sjpk  * This means that we don't need to do a lookup on the source; the
1103*1676Sjpk  * TSLF_UNLABELED flag is sufficient.
1104*1676Sjpk  */
1105*1676Sjpk boolean_t
1106*1676Sjpk tsol_can_reply_error(const mblk_t *mp)
1107*1676Sjpk {
1108*1676Sjpk 	ts_label_t	*plabel = NULL;
1109*1676Sjpk 	tsol_tpc_t	*rhtp;
1110*1676Sjpk 	const ipha_t	*ipha;
1111*1676Sjpk 	const ip6_t	*ip6h;
1112*1676Sjpk 	boolean_t	retv;
1113*1676Sjpk 	bslabel_t	*pktbs;
1114*1676Sjpk 
1115*1676Sjpk 	/* Caller must pull up at least the IP header */
1116*1676Sjpk 	ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ?
1117*1676Sjpk 	    sizeof (*ipha) : sizeof (*ip6h)));
1118*1676Sjpk 
1119*1676Sjpk 	if (!tsol_strict_error)
1120*1676Sjpk 		return (B_TRUE);
1121*1676Sjpk 
1122*1676Sjpk 	if (DB_CRED(mp) != NULL)
1123*1676Sjpk 		plabel = crgetlabel(DB_CRED(mp));
1124*1676Sjpk 
1125*1676Sjpk 	/* We are bootstrapping or the internal template was never deleted */
1126*1676Sjpk 	if (plabel == NULL)
1127*1676Sjpk 		return (B_TRUE);
1128*1676Sjpk 
1129*1676Sjpk 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1130*1676Sjpk 		ipha = (const ipha_t *)mp->b_rptr;
1131*1676Sjpk 		rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, B_FALSE);
1132*1676Sjpk 	} else {
1133*1676Sjpk 		ip6h = (const ip6_t *)mp->b_rptr;
1134*1676Sjpk 		rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, B_FALSE);
1135*1676Sjpk 	}
1136*1676Sjpk 
1137*1676Sjpk 	if (rhtp == NULL || label2doi(plabel) != rhtp->tpc_tp.tp_doi) {
1138*1676Sjpk 		retv = B_FALSE;
1139*1676Sjpk 	} else {
1140*1676Sjpk 		/*
1141*1676Sjpk 		 * If we're in the midst of forwarding, then the destination
1142*1676Sjpk 		 * address might not be labeled.  In that case, allow unlabeled
1143*1676Sjpk 		 * packets through only if the default label is the same, and
1144*1676Sjpk 		 * labeled ones if they dominate.
1145*1676Sjpk 		 */
1146*1676Sjpk 		pktbs = label2bslabel(plabel);
1147*1676Sjpk 		switch (rhtp->tpc_tp.host_type) {
1148*1676Sjpk 		case UNLABELED:
1149*1676Sjpk 			if (plabel->tsl_flags & TSLF_UNLABELED) {
1150*1676Sjpk 				retv = blequal(pktbs,
1151*1676Sjpk 				    &rhtp->tpc_tp.tp_def_label);
1152*1676Sjpk 			} else {
1153*1676Sjpk 				retv = bldominates(pktbs,
1154*1676Sjpk 				    &rhtp->tpc_tp.tp_def_label);
1155*1676Sjpk 			}
1156*1676Sjpk 			break;
1157*1676Sjpk 
1158*1676Sjpk 		case SUN_CIPSO:
1159*1676Sjpk 			retv = _blinrange(pktbs,
1160*1676Sjpk 			    &rhtp->tpc_tp.tp_sl_range_cipso) ||
1161*1676Sjpk 			    blinlset(pktbs, rhtp->tpc_tp.tp_sl_set_cipso);
1162*1676Sjpk 			break;
1163*1676Sjpk 
1164*1676Sjpk 		default:
1165*1676Sjpk 			retv = B_FALSE;
1166*1676Sjpk 			break;
1167*1676Sjpk 		}
1168*1676Sjpk 	}
1169*1676Sjpk 
1170*1676Sjpk 	if (rhtp != NULL)
1171*1676Sjpk 		TPC_RELE(rhtp);
1172*1676Sjpk 
1173*1676Sjpk 	return (retv);
1174*1676Sjpk }
1175*1676Sjpk 
1176*1676Sjpk /*
1177*1676Sjpk  * Finds the zone associated with the given packet.  Returns GLOBAL_ZONEID if
1178*1676Sjpk  * the zone cannot be located.
1179*1676Sjpk  *
1180*1676Sjpk  * This is used by the classifier when the packet matches an ALL_ZONES IRE, and
1181*1676Sjpk  * there's no MLP defined.
1182*1676Sjpk  */
1183*1676Sjpk zoneid_t
1184*1676Sjpk tsol_packet_to_zoneid(const mblk_t *mp)
1185*1676Sjpk {
1186*1676Sjpk 	cred_t *cr = DB_CRED(mp);
1187*1676Sjpk 	zone_t *zone;
1188*1676Sjpk 	ts_label_t *label;
1189*1676Sjpk 
1190*1676Sjpk 	if (cr != NULL) {
1191*1676Sjpk 		if ((label = crgetlabel(cr)) != NULL) {
1192*1676Sjpk 			zone = zone_find_by_label(label);
1193*1676Sjpk 			if (zone != NULL) {
1194*1676Sjpk 				zoneid_t zoneid = zone->zone_id;
1195*1676Sjpk 
1196*1676Sjpk 				zone_rele(zone);
1197*1676Sjpk 				return (zoneid);
1198*1676Sjpk 			}
1199*1676Sjpk 		}
1200*1676Sjpk 	}
1201*1676Sjpk 	return (GLOBAL_ZONEID);
1202*1676Sjpk }
1203*1676Sjpk 
1204*1676Sjpk int
1205*1676Sjpk tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
1206*1676Sjpk {
1207*1676Sjpk 	int		error = 0;
1208*1676Sjpk 	tsol_ire_gw_secattr_t *attrp = NULL;
1209*1676Sjpk 	tsol_tnrhc_t	*gw_rhc = NULL;
1210*1676Sjpk 	tsol_gcgrp_t	*gcgrp = NULL;
1211*1676Sjpk 	tsol_gc_t	*gc = NULL;
1212*1676Sjpk 	in_addr_t	ga_addr4;
1213*1676Sjpk 	void		*paddr = NULL;
1214*1676Sjpk 
1215*1676Sjpk 	/* Not in Trusted mode or IRE is local/loopback/broadcast/interface */
1216*1676Sjpk 	if (!is_system_labeled() ||
1217*1676Sjpk 	    (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST |
1218*1676Sjpk 	    IRE_INTERFACE)))
1219*1676Sjpk 		goto done;
1220*1676Sjpk 
1221*1676Sjpk 	/*
1222*1676Sjpk 	 * If we don't have a label to compare with, or the IRE does not
1223*1676Sjpk 	 * contain any gateway security attributes, there's not much that
1224*1676Sjpk 	 * we can do.  We let the former case pass, and the latter fail,
1225*1676Sjpk 	 * since the IRE doesn't qualify for a match due to the lack of
1226*1676Sjpk 	 * security attributes.
1227*1676Sjpk 	 */
1228*1676Sjpk 	if (tsl == NULL || ire->ire_gw_secattr == NULL) {
1229*1676Sjpk 		if (tsl != NULL) {
1230*1676Sjpk 			DTRACE_PROBE3(
1231*1676Sjpk 			tx__ip__log__drop__irematch__nogwsec, char *,
1232*1676Sjpk 			"ire(1) lacks ire_gw_secattr when matching label(2)",
1233*1676Sjpk 			ire_t *, ire, ts_label_t *, tsl);
1234*1676Sjpk 			error = EACCES;
1235*1676Sjpk 		}
1236*1676Sjpk 		goto done;
1237*1676Sjpk 	}
1238*1676Sjpk 
1239*1676Sjpk 	attrp = ire->ire_gw_secattr;
1240*1676Sjpk 
1241*1676Sjpk 	/*
1242*1676Sjpk 	 * The possible lock order scenarios related to the tsol gateway
1243*1676Sjpk 	 * attribute locks are documented at the beginning of ip.c in the
1244*1676Sjpk 	 * lock order scenario section.
1245*1676Sjpk 	 */
1246*1676Sjpk 	mutex_enter(&attrp->igsa_lock);
1247*1676Sjpk 
1248*1676Sjpk 	/*
1249*1676Sjpk 	 * Depending on the IRE type (prefix vs. cache), we seek the group
1250*1676Sjpk 	 * structure which contains all security credentials of the gateway.
1251*1676Sjpk 	 * A prefix IRE is associated with at most one gateway credential,
1252*1676Sjpk 	 * while a cache IRE is associated with every credentials that the
1253*1676Sjpk 	 * gateway has.
1254*1676Sjpk 	 */
1255*1676Sjpk 	if ((gc = attrp->igsa_gc) != NULL) {			/* prefix */
1256*1676Sjpk 		gcgrp = gc->gc_grp;
1257*1676Sjpk 		ASSERT(gcgrp != NULL);
1258*1676Sjpk 		rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1259*1676Sjpk 	} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {	/* cache */
1260*1676Sjpk 		rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1261*1676Sjpk 		gc = gcgrp->gcgrp_head;
1262*1676Sjpk 		if (gc == NULL) {
1263*1676Sjpk 			/* gc group is empty, so the drop lock now */
1264*1676Sjpk 			ASSERT(gcgrp->gcgrp_count == 0);
1265*1676Sjpk 			rw_exit(&gcgrp->gcgrp_rwlock);
1266*1676Sjpk 			gcgrp = NULL;
1267*1676Sjpk 		}
1268*1676Sjpk 	}
1269*1676Sjpk 
1270*1676Sjpk 	if (gcgrp != NULL)
1271*1676Sjpk 		GCGRP_REFHOLD(gcgrp);
1272*1676Sjpk 
1273*1676Sjpk 	if ((gw_rhc = attrp->igsa_rhc) != NULL) {
1274*1676Sjpk 		/*
1275*1676Sjpk 		 * If our cached entry has grown stale, then discard it so we
1276*1676Sjpk 		 * can get a new one.
1277*1676Sjpk 		 */
1278*1676Sjpk 		if (gw_rhc->rhc_invalid || gw_rhc->rhc_tpc->tpc_invalid) {
1279*1676Sjpk 			TNRHC_RELE(gw_rhc);
1280*1676Sjpk 			attrp->igsa_rhc = gw_rhc = NULL;
1281*1676Sjpk 		} else {
1282*1676Sjpk 			TNRHC_HOLD(gw_rhc)
1283*1676Sjpk 		}
1284*1676Sjpk 	}
1285*1676Sjpk 
1286*1676Sjpk 	/* Last attempt at loading the template had failed; try again */
1287*1676Sjpk 	if (gw_rhc == NULL) {
1288*1676Sjpk 		if (gcgrp != NULL) {
1289*1676Sjpk 			tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
1290*1676Sjpk 
1291*1676Sjpk 			if (ire->ire_ipversion == IPV4_VERSION) {
1292*1676Sjpk 				ASSERT(ga->ga_af == AF_INET);
1293*1676Sjpk 				IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
1294*1676Sjpk 				paddr = &ga_addr4;
1295*1676Sjpk 			} else {
1296*1676Sjpk 				ASSERT(ga->ga_af == AF_INET6);
1297*1676Sjpk 				paddr = &ga->ga_addr;
1298*1676Sjpk 			}
1299*1676Sjpk 		} else if (ire->ire_ipversion == IPV6_VERSION &&
1300*1676Sjpk 		    !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) {
1301*1676Sjpk 			paddr = &ire->ire_gateway_addr_v6;
1302*1676Sjpk 		} else if (ire->ire_ipversion == IPV4_VERSION &&
1303*1676Sjpk 		    ire->ire_gateway_addr != INADDR_ANY) {
1304*1676Sjpk 			paddr = &ire->ire_gateway_addr;
1305*1676Sjpk 		}
1306*1676Sjpk 
1307*1676Sjpk 		/* We've found a gateway address to do the template lookup */
1308*1676Sjpk 		if (paddr != NULL) {
1309*1676Sjpk 			ASSERT(gw_rhc == NULL);
1310*1676Sjpk 			if (ire->ire_ipversion == IPV4_VERSION)
1311*1676Sjpk 				gw_rhc = find_rhc_v4(paddr);
1312*1676Sjpk 			else
1313*1676Sjpk 				gw_rhc = find_rhc_v6(paddr);
1314*1676Sjpk 			if (gw_rhc != NULL) {
1315*1676Sjpk 				/*
1316*1676Sjpk 				 * Note that if the lookup above returned an
1317*1676Sjpk 				 * internal template, we'll use it for the
1318*1676Sjpk 				 * time being, and do another lookup next
1319*1676Sjpk 				 * time around.
1320*1676Sjpk 				 */
1321*1676Sjpk 				/* Another thread has loaded the template? */
1322*1676Sjpk 				if (attrp->igsa_rhc != NULL) {
1323*1676Sjpk 					TNRHC_RELE(gw_rhc)
1324*1676Sjpk 					/* reload, it could be different */
1325*1676Sjpk 					gw_rhc = attrp->igsa_rhc;
1326*1676Sjpk 				} else {
1327*1676Sjpk 					attrp->igsa_rhc = gw_rhc;
1328*1676Sjpk 				}
1329*1676Sjpk 				/*
1330*1676Sjpk 				 * Hold an extra reference just like we did
1331*1676Sjpk 				 * above prior to dropping the igsa_lock.
1332*1676Sjpk 				 */
1333*1676Sjpk 				TNRHC_HOLD(gw_rhc)
1334*1676Sjpk 			}
1335*1676Sjpk 		}
1336*1676Sjpk 	}
1337*1676Sjpk 
1338*1676Sjpk 	mutex_exit(&attrp->igsa_lock);
1339*1676Sjpk 	/* Gateway template not found */
1340*1676Sjpk 	if (gw_rhc == NULL) {
1341*1676Sjpk 		/*
1342*1676Sjpk 		 * If destination address is directly reachable through an
1343*1676Sjpk 		 * interface rather than through a learned route, pass it.
1344*1676Sjpk 		 */
1345*1676Sjpk 		if (paddr != NULL) {
1346*1676Sjpk 			DTRACE_PROBE3(
1347*1676Sjpk 			    tx__ip__log__drop__irematch__nogwtmpl, char *,
1348*1676Sjpk 			    "ire(1), label(2) off-link with no gw_rhc",
1349*1676Sjpk 			    ire_t *, ire, ts_label_t *, tsl);
1350*1676Sjpk 			error = EINVAL;
1351*1676Sjpk 		}
1352*1676Sjpk 		goto done;
1353*1676Sjpk 	}
1354*1676Sjpk 
1355*1676Sjpk 	if (gc != NULL) {
1356*1676Sjpk 		tsol_gcdb_t *gcdb;
1357*1676Sjpk 		/*
1358*1676Sjpk 		 * In the case of IRE_CACHE we've got one or more gateway
1359*1676Sjpk 		 * security credentials to compare against the passed in label.
1360*1676Sjpk 		 * Perform label range comparison against each security
1361*1676Sjpk 		 * credential of the gateway. In the case of a prefix ire
1362*1676Sjpk 		 * we need to match against the security attributes of
1363*1676Sjpk 		 * just the route itself, so the loop is executed only once.
1364*1676Sjpk 		 */
1365*1676Sjpk 		ASSERT(gcgrp != NULL);
1366*1676Sjpk 		do {
1367*1676Sjpk 			gcdb = gc->gc_db;
1368*1676Sjpk 			if (tsl->tsl_doi == gcdb->gcdb_doi &&
1369*1676Sjpk 			    _blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange))
1370*1676Sjpk 				break;
1371*1676Sjpk 			if (ire->ire_type == IRE_CACHE)
1372*1676Sjpk 				gc = gc->gc_next;
1373*1676Sjpk 			else
1374*1676Sjpk 				gc = NULL;
1375*1676Sjpk 		} while (gc != NULL);
1376*1676Sjpk 
1377*1676Sjpk 		if (gc == NULL) {
1378*1676Sjpk 			DTRACE_PROBE3(
1379*1676Sjpk 			    tx__ip__log__drop__irematch__nogcmatched,
1380*1676Sjpk 			    char *, "ire(1), tsl(2): all gc failed match",
1381*1676Sjpk 			    ire_t *, ire, ts_label_t *, tsl);
1382*1676Sjpk 			error = EACCES;
1383*1676Sjpk 		}
1384*1676Sjpk 	} else {
1385*1676Sjpk 		/*
1386*1676Sjpk 		 * We didn't find any gateway credentials in the IRE
1387*1676Sjpk 		 * attributes; fall back to the gateway's template for
1388*1676Sjpk 		 * label range checks, if we are required to do so.
1389*1676Sjpk 		 */
1390*1676Sjpk 		ASSERT(gw_rhc != NULL);
1391*1676Sjpk 		switch (gw_rhc->rhc_tpc->tpc_tp.host_type) {
1392*1676Sjpk 		case SUN_CIPSO:
1393*1676Sjpk 			if (tsl->tsl_doi !=
1394*1676Sjpk 			    gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
1395*1676Sjpk 			    (!_blinrange(&tsl->tsl_label,
1396*1676Sjpk 			    &gw_rhc->rhc_tpc->tpc_tp.
1397*1676Sjpk 			    tp_sl_range_cipso) &&
1398*1676Sjpk 			    !blinlset(&tsl->tsl_label,
1399*1676Sjpk 			    gw_rhc->rhc_tpc->tpc_tp.tp_sl_set_cipso))) {
1400*1676Sjpk 				error = EACCES;
1401*1676Sjpk 				DTRACE_PROBE4(
1402*1676Sjpk 				    tx__ip__log__drop__irematch__deftmpl,
1403*1676Sjpk 				    char *, "ire(1), tsl(2), gw_rhc(3) "
1404*1676Sjpk 				    "failed match (cipso gw)",
1405*1676Sjpk 				    ire_t *, ire, ts_label_t *, tsl,
1406*1676Sjpk 				    tsol_tnrhc_t *, gw_rhc);
1407*1676Sjpk 			}
1408*1676Sjpk 			break;
1409*1676Sjpk 
1410*1676Sjpk 		case UNLABELED:
1411*1676Sjpk 			if (tsl->tsl_doi !=
1412*1676Sjpk 				gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
1413*1676Sjpk 			    (!_blinrange(&tsl->tsl_label,
1414*1676Sjpk 			    &gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_range) &&
1415*1676Sjpk 			    !blinlset(&tsl->tsl_label,
1416*1676Sjpk 			    gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_set))) {
1417*1676Sjpk 				error = EACCES;
1418*1676Sjpk 				DTRACE_PROBE4(
1419*1676Sjpk 				    tx__ip__log__drop__irematch__deftmpl,
1420*1676Sjpk 				    char *, "ire(1), tsl(2), gw_rhc(3) "
1421*1676Sjpk 				    "failed match (unlabeled gw)",
1422*1676Sjpk 				    ire_t *, ire, ts_label_t *, tsl,
1423*1676Sjpk 				    tsol_tnrhc_t *, gw_rhc);
1424*1676Sjpk 			}
1425*1676Sjpk 			break;
1426*1676Sjpk 		}
1427*1676Sjpk 	}
1428*1676Sjpk 
1429*1676Sjpk done:
1430*1676Sjpk 
1431*1676Sjpk 	if (gcgrp != NULL) {
1432*1676Sjpk 		rw_exit(&gcgrp->gcgrp_rwlock);
1433*1676Sjpk 		GCGRP_REFRELE(gcgrp);
1434*1676Sjpk 	}
1435*1676Sjpk 
1436*1676Sjpk 	if (gw_rhc != NULL)
1437*1676Sjpk 		TNRHC_RELE(gw_rhc)
1438*1676Sjpk 
1439*1676Sjpk 	return (error);
1440*1676Sjpk }
1441*1676Sjpk 
1442*1676Sjpk /*
1443*1676Sjpk  * Performs label accreditation checks for packet forwarding.
1444*1676Sjpk  *
1445*1676Sjpk  * Returns a pointer to the modified mblk if allowed for forwarding,
1446*1676Sjpk  * or NULL if the packet must be dropped.
1447*1676Sjpk  */
1448*1676Sjpk mblk_t *
1449*1676Sjpk tsol_ip_forward(ire_t *ire, mblk_t *mp)
1450*1676Sjpk {
1451*1676Sjpk 	tsol_ire_gw_secattr_t *attrp = NULL;
1452*1676Sjpk 	ipha_t		*ipha;
1453*1676Sjpk 	ip6_t		*ip6h;
1454*1676Sjpk 	const void	*pdst;
1455*1676Sjpk 	const void	*psrc;
1456*1676Sjpk 	boolean_t	off_link;
1457*1676Sjpk 	tsol_tpc_t	*dst_rhtp, *gw_rhtp;
1458*1676Sjpk 	tsol_ip_label_t label_type;
1459*1676Sjpk 	uchar_t		*opt_ptr = NULL;
1460*1676Sjpk 	ts_label_t	*tsl;
1461*1676Sjpk 	uint8_t		proto;
1462*1676Sjpk 	int		af, adjust;
1463*1676Sjpk 	uint16_t	iplen;
1464*1676Sjpk 
1465*1676Sjpk 	ASSERT(ire != NULL && mp != NULL);
1466*1676Sjpk 	ASSERT(ire->ire_stq != NULL);
1467*1676Sjpk 
1468*1676Sjpk 	af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6;
1469*1676Sjpk 
1470*1676Sjpk 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1471*1676Sjpk 		ASSERT(ire->ire_ipversion == IPV4_VERSION);
1472*1676Sjpk 		ipha = (ipha_t *)mp->b_rptr;
1473*1676Sjpk 		psrc = &ipha->ipha_src;
1474*1676Sjpk 		pdst = &ipha->ipha_dst;
1475*1676Sjpk 		proto = ipha->ipha_protocol;
1476*1676Sjpk 
1477*1676Sjpk 		/* destination not directly reachable? */
1478*1676Sjpk 		off_link = (ire->ire_gateway_addr != INADDR_ANY);
1479*1676Sjpk 	} else {
1480*1676Sjpk 		ASSERT(ire->ire_ipversion == IPV6_VERSION);
1481*1676Sjpk 		ip6h = (ip6_t *)mp->b_rptr;
1482*1676Sjpk 		psrc = &ip6h->ip6_src;
1483*1676Sjpk 		pdst = &ip6h->ip6_dst;
1484*1676Sjpk 		proto = ip6h->ip6_nxt;
1485*1676Sjpk 
1486*1676Sjpk 		if (proto != IPPROTO_TCP && proto != IPPROTO_UDP &&
1487*1676Sjpk 		    proto != IPPROTO_ICMPV6) {
1488*1676Sjpk 			uint8_t *nexthdrp;
1489*1676Sjpk 			uint16_t hdr_len;
1490*1676Sjpk 
1491*1676Sjpk 			if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len,
1492*1676Sjpk 			    &nexthdrp)) {
1493*1676Sjpk 				/* malformed packet; drop it */
1494*1676Sjpk 				return (NULL);
1495*1676Sjpk 			}
1496*1676Sjpk 			proto = *nexthdrp;
1497*1676Sjpk 		}
1498*1676Sjpk 
1499*1676Sjpk 		/* destination not directly reachable? */
1500*1676Sjpk 		off_link = !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6);
1501*1676Sjpk 	}
1502*1676Sjpk 
1503*1676Sjpk 	if ((tsl = MBLK_GETLABEL(mp)) == NULL)
1504*1676Sjpk 		return (mp);
1505*1676Sjpk 
1506*1676Sjpk 	label_type = tsol_get_option(mp, &opt_ptr);
1507*1676Sjpk 
1508*1676Sjpk 	ASSERT(psrc != NULL && pdst != NULL);
1509*1676Sjpk 	dst_rhtp = find_tpc(pdst, ire->ire_ipversion, B_FALSE);
1510*1676Sjpk 
1511*1676Sjpk 	if (dst_rhtp == NULL) {
1512*1676Sjpk 		/*
1513*1676Sjpk 		 * Without a template we do not know if forwarding
1514*1676Sjpk 		 * violates MAC
1515*1676Sjpk 		 */
1516*1676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__forward__nodst, char *,
1517*1676Sjpk 		    "mp(1) dropped, no template for destination ip4|6(2)",
1518*1676Sjpk 		    mblk_t *, mp, void *, pdst);
1519*1676Sjpk 		return (NULL);
1520*1676Sjpk 	}
1521*1676Sjpk 
1522*1676Sjpk 	/*
1523*1676Sjpk 	 * Gateway template must have existed for off-link destinations,
1524*1676Sjpk 	 * since tsol_ire_match_gwattr has ensured such condition.
1525*1676Sjpk 	 */
1526*1676Sjpk 	if (((attrp = ire->ire_gw_secattr) == NULL || attrp->igsa_rhc == NULL ||
1527*1676Sjpk 	    (gw_rhtp = attrp->igsa_rhc->rhc_tpc) == NULL) && off_link) {
1528*1676Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
1529*1676Sjpk 		    "mp(1) dropped, no gateway in ire attributes(2)",
1530*1676Sjpk 		    mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
1531*1676Sjpk 		mp = NULL;
1532*1676Sjpk 		goto keep_label;
1533*1676Sjpk 	}
1534*1676Sjpk 
1535*1676Sjpk 	/*
1536*1676Sjpk 	 * Check that the label for the packet is acceptable
1537*1676Sjpk 	 * by destination host; otherwise, drop it.
1538*1676Sjpk 	 */
1539*1676Sjpk 	switch (dst_rhtp->tpc_tp.host_type) {
1540*1676Sjpk 	case SUN_CIPSO:
1541*1676Sjpk 		if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
1542*1676Sjpk 		    (!_blinrange(&tsl->tsl_label,
1543*1676Sjpk 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
1544*1676Sjpk 		    !blinlset(&tsl->tsl_label,
1545*1676Sjpk 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
1546*1676Sjpk 			DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
1547*1676Sjpk 			    "labeled packet mp(1) dropped, label(2) fails "
1548*1676Sjpk 			    "destination(3) accredation check",
1549*1676Sjpk 			    mblk_t *, mp, ts_label_t *, tsl,
1550*1676Sjpk 			    tsol_tpc_t *, dst_rhtp);
1551*1676Sjpk 			mp = NULL;
1552*1676Sjpk 			goto keep_label;
1553*1676Sjpk 		}
1554*1676Sjpk 		break;
1555*1676Sjpk 
1556*1676Sjpk 
1557*1676Sjpk 	case UNLABELED:
1558*1676Sjpk 		if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
1559*1676Sjpk 		    !blequal(&dst_rhtp->tpc_tp.tp_def_label,
1560*1676Sjpk 		    &tsl->tsl_label)) {
1561*1676Sjpk 			DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
1562*1676Sjpk 			    "unlabeled packet mp(1) dropped, label(2) fails "
1563*1676Sjpk 			    "destination(3) accredation check",
1564*1676Sjpk 			    mblk_t *, mp, ts_label_t *, tsl,
1565*1676Sjpk 			    tsol_tpc_t *, dst_rhtp);
1566*1676Sjpk 			mp = NULL;
1567*1676Sjpk 			goto keep_label;
1568*1676Sjpk 		}
1569*1676Sjpk 		break;
1570*1676Sjpk 	}
1571*1676Sjpk 	if (label_type == OPT_CIPSO) {
1572*1676Sjpk 		/*
1573*1676Sjpk 		 * We keep the label on any of the following cases:
1574*1676Sjpk 		 *
1575*1676Sjpk 		 *   1. The destination is labeled (on/off-link).
1576*1676Sjpk 		 *   2. The unlabeled destination is off-link,
1577*1676Sjpk 		 *	and the next hop gateway is labeled.
1578*1676Sjpk 		 */
1579*1676Sjpk 		if (dst_rhtp->tpc_tp.host_type != UNLABELED ||
1580*1676Sjpk 		    (off_link &&
1581*1676Sjpk 		    gw_rhtp->tpc_tp.host_type != UNLABELED))
1582*1676Sjpk 			goto keep_label;
1583*1676Sjpk 
1584*1676Sjpk 		/*
1585*1676Sjpk 		 * Strip off the CIPSO option from the packet because: the
1586*1676Sjpk 		 * unlabeled destination host is directly reachable through
1587*1676Sjpk 		 * an interface (on-link); or, the unlabeled destination host
1588*1676Sjpk 		 * is not directly reachable (off-link), and the next hop
1589*1676Sjpk 		 * gateway is unlabeled.
1590*1676Sjpk 		 */
1591*1676Sjpk 		adjust = (af == AF_INET) ? tsol_remove_secopt(ipha, MBLKL(mp)) :
1592*1676Sjpk 		    tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1593*1676Sjpk 
1594*1676Sjpk 		ASSERT(adjust <= 0);
1595*1676Sjpk 		if (adjust != 0) {
1596*1676Sjpk 
1597*1676Sjpk 			/* adjust is negative */
1598*1676Sjpk 			ASSERT((mp->b_wptr + adjust) >= mp->b_rptr);
1599*1676Sjpk 			mp->b_wptr += adjust;
1600*1676Sjpk 
1601*1676Sjpk 			if (af == AF_INET) {
1602*1676Sjpk 				ipha = (ipha_t *)mp->b_rptr;
1603*1676Sjpk 				iplen = ntohs(ipha->ipha_length) + adjust;
1604*1676Sjpk 				ipha->ipha_length = htons(iplen);
1605*1676Sjpk 				ipha->ipha_hdr_checksum = 0;
1606*1676Sjpk 				ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1607*1676Sjpk 			}
1608*1676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__forward__adjust,
1609*1676Sjpk 			    char *,
1610*1676Sjpk 			    "mp(1) adjusted(2) for CIPSO option removal",
1611*1676Sjpk 			    mblk_t *, mp, int, adjust);
1612*1676Sjpk 		}
1613*1676Sjpk 		goto keep_label;
1614*1676Sjpk 	}
1615*1676Sjpk 
1616*1676Sjpk 	ASSERT(label_type == OPT_NONE);
1617*1676Sjpk 	ASSERT(dst_rhtp != NULL);
1618*1676Sjpk 
1619*1676Sjpk 	/*
1620*1676Sjpk 	 * We need to add CIPSO option if the destination or the next hop
1621*1676Sjpk 	 * gateway is labeled.  Otherwise, pass the packet as is.
1622*1676Sjpk 	 */
1623*1676Sjpk 	if (dst_rhtp->tpc_tp.host_type == UNLABELED &&
1624*1676Sjpk 	    (!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED))
1625*1676Sjpk 		goto keep_label;
1626*1676Sjpk 
1627*1676Sjpk 	if ((af == AF_INET &&
1628*1676Sjpk 	    tsol_check_label(DB_CRED(mp), &mp, &adjust, B_FALSE) != 0) ||
1629*1676Sjpk 	    (af == AF_INET6 &&
1630*1676Sjpk 	    tsol_check_label_v6(DB_CRED(mp), &mp, &adjust, B_FALSE) != 0)) {
1631*1676Sjpk 		mp = NULL;
1632*1676Sjpk 		goto keep_label;
1633*1676Sjpk 	}
1634*1676Sjpk 
1635*1676Sjpk 	ASSERT(adjust != -1);
1636*1676Sjpk 	if (adjust != 0) {
1637*1676Sjpk 		if (af == AF_INET) {
1638*1676Sjpk 			ipha = (ipha_t *)mp->b_rptr;
1639*1676Sjpk 			iplen = ntohs(ipha->ipha_length) + adjust;
1640*1676Sjpk 			ipha->ipha_length = htons(iplen);
1641*1676Sjpk 			ipha->ipha_hdr_checksum = 0;
1642*1676Sjpk 			ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1643*1676Sjpk 		}
1644*1676Sjpk 
1645*1676Sjpk 		DTRACE_PROBE3(tx__ip__log__info__forward__adjust, char *,
1646*1676Sjpk 		    "mp(1) adjusted(2) for CIPSO option removal",
1647*1676Sjpk 		    mblk_t *, mp, int, adjust);
1648*1676Sjpk 	}
1649*1676Sjpk 
1650*1676Sjpk keep_label:
1651*1676Sjpk 	TPC_RELE(dst_rhtp);
1652*1676Sjpk 	return (mp);
1653*1676Sjpk }
1654*1676Sjpk 
1655*1676Sjpk /*
1656*1676Sjpk  * Name:	tsol_rtsa_init()
1657*1676Sjpk  *
1658*1676Sjpk  * Normal:	Sanity checks on the route security attributes provided by
1659*1676Sjpk  *		user.  Convert it into a route security parameter list to
1660*1676Sjpk  *		be returned to caller.
1661*1676Sjpk  *
1662*1676Sjpk  * Output:	EINVAL if bad security attributes in the routing message
1663*1676Sjpk  *		ENOMEM if unable to allocate data structures
1664*1676Sjpk  *		0 otherwise.
1665*1676Sjpk  *
1666*1676Sjpk  * Note:	On input, cp must point to the end of any addresses in
1667*1676Sjpk  *		the rt_msghdr_t structure.
1668*1676Sjpk  */
1669*1676Sjpk int
1670*1676Sjpk tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp)
1671*1676Sjpk {
1672*1676Sjpk 	uint_t	sacnt;
1673*1676Sjpk 	int	err;
1674*1676Sjpk 	caddr_t	lim;
1675*1676Sjpk 	tsol_rtsecattr_t *tp;
1676*1676Sjpk 
1677*1676Sjpk 	ASSERT((cp >= (caddr_t)&rtm[1]) && sp != NULL);
1678*1676Sjpk 
1679*1676Sjpk 	/*
1680*1676Sjpk 	 * In theory, we could accept as many security attributes configured
1681*1676Sjpk 	 * per route destination.  However, the current design is limited
1682*1676Sjpk 	 * such that at most only one set security attributes is allowed to
1683*1676Sjpk 	 * be associated with a prefix IRE.  We therefore assert for now.
1684*1676Sjpk 	 */
1685*1676Sjpk 	/* LINTED */
1686*1676Sjpk 	ASSERT(TSOL_RTSA_REQUEST_MAX == 1);
1687*1676Sjpk 
1688*1676Sjpk 	sp->rtsa_cnt = 0;
1689*1676Sjpk 	lim = (caddr_t)rtm + rtm->rtm_msglen;
1690*1676Sjpk 	ASSERT(cp <= lim);
1691*1676Sjpk 
1692*1676Sjpk 	if ((lim - cp) < sizeof (rtm_ext_t) ||
1693*1676Sjpk 	    ((rtm_ext_t *)cp)->rtmex_type != RTMEX_GATEWAY_SECATTR)
1694*1676Sjpk 		return (0);
1695*1676Sjpk 
1696*1676Sjpk 	if (((rtm_ext_t *)cp)->rtmex_len < sizeof (tsol_rtsecattr_t))
1697*1676Sjpk 		return (EINVAL);
1698*1676Sjpk 
1699*1676Sjpk 	cp += sizeof (rtm_ext_t);
1700*1676Sjpk 
1701*1676Sjpk 	if ((lim - cp) < sizeof (*tp) ||
1702*1676Sjpk 	    (tp = (tsol_rtsecattr_t *)cp, (sacnt = tp->rtsa_cnt) == 0) ||
1703*1676Sjpk 	    (lim - cp) < TSOL_RTSECATTR_SIZE(sacnt))
1704*1676Sjpk 		return (EINVAL);
1705*1676Sjpk 
1706*1676Sjpk 	/*
1707*1676Sjpk 	 * Trying to add route security attributes when system
1708*1676Sjpk 	 * labeling service is not available, or when user supllies
1709*1676Sjpk 	 * more than the maximum number of security attributes
1710*1676Sjpk 	 * allowed per request.
1711*1676Sjpk 	 */
1712*1676Sjpk 	if ((sacnt > 0 && !is_system_labeled()) ||
1713*1676Sjpk 	    sacnt > TSOL_RTSA_REQUEST_MAX)
1714*1676Sjpk 		return (EINVAL);
1715*1676Sjpk 
1716*1676Sjpk 	/* Ensure valid credentials */
1717*1676Sjpk 	if ((err = rtsa_validate(&((tsol_rtsecattr_t *)cp)->
1718*1676Sjpk 	    rtsa_attr[0])) != 0) {
1719*1676Sjpk 		cp += sizeof (*sp);
1720*1676Sjpk 		return (err);
1721*1676Sjpk 	}
1722*1676Sjpk 
1723*1676Sjpk 	bcopy(cp, sp, sizeof (*sp));
1724*1676Sjpk 	cp += sizeof (*sp);
1725*1676Sjpk 	return (0);
1726*1676Sjpk }
1727*1676Sjpk 
1728*1676Sjpk int
1729*1676Sjpk tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc,
1730*1676Sjpk     tsol_gcgrp_t *gcgrp)
1731*1676Sjpk {
1732*1676Sjpk 	tsol_ire_gw_secattr_t *attrp;
1733*1676Sjpk 	boolean_t exists = B_FALSE;
1734*1676Sjpk 	in_addr_t ga_addr4;
1735*1676Sjpk 	void *paddr = NULL;
1736*1676Sjpk 
1737*1676Sjpk 	ASSERT(ire != NULL);
1738*1676Sjpk 
1739*1676Sjpk 	/*
1740*1676Sjpk 	 * The only time that attrp can be NULL is when this routine is
1741*1676Sjpk 	 * called for the first time during the creation/initialization
1742*1676Sjpk 	 * of the corresponding IRE.  It will only get cleared when the
1743*1676Sjpk 	 * IRE is deleted.
1744*1676Sjpk 	 */
1745*1676Sjpk 	if ((attrp = ire->ire_gw_secattr) == NULL) {
1746*1676Sjpk 		attrp = ire_gw_secattr_alloc(KM_NOSLEEP);
1747*1676Sjpk 		if (attrp == NULL)
1748*1676Sjpk 			return (ENOMEM);
1749*1676Sjpk 		ire->ire_gw_secattr = attrp;
1750*1676Sjpk 	} else {
1751*1676Sjpk 		exists = B_TRUE;
1752*1676Sjpk 		mutex_enter(&attrp->igsa_lock);
1753*1676Sjpk 
1754*1676Sjpk 		if (attrp->igsa_rhc != NULL) {
1755*1676Sjpk 			TNRHC_RELE(attrp->igsa_rhc);
1756*1676Sjpk 			attrp->igsa_rhc = NULL;
1757*1676Sjpk 		}
1758*1676Sjpk 
1759*1676Sjpk 		if (attrp->igsa_gc != NULL)
1760*1676Sjpk 			GC_REFRELE(attrp->igsa_gc);
1761*1676Sjpk 		if (attrp->igsa_gcgrp != NULL)
1762*1676Sjpk 			GCGRP_REFRELE(attrp->igsa_gcgrp);
1763*1676Sjpk 	}
1764*1676Sjpk 	ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock));
1765*1676Sjpk 
1766*1676Sjpk 	/*
1767*1676Sjpk 	 * References already held by caller and we keep them;
1768*1676Sjpk 	 * note that both gc and gcgrp may be set to NULL to
1769*1676Sjpk 	 * clear out igsa_gc and igsa_gcgrp, respectively.
1770*1676Sjpk 	 */
1771*1676Sjpk 	attrp->igsa_gc = gc;
1772*1676Sjpk 	attrp->igsa_gcgrp = gcgrp;
1773*1676Sjpk 
1774*1676Sjpk 	if (gcgrp == NULL && gc != NULL) {
1775*1676Sjpk 		gcgrp = gc->gc_grp;
1776*1676Sjpk 		ASSERT(gcgrp != NULL);
1777*1676Sjpk 	}
1778*1676Sjpk 
1779*1676Sjpk 	/*
1780*1676Sjpk 	 * Intialize the template for gateway; we use the gateway's
1781*1676Sjpk 	 * address found in either the passed in gateway credential
1782*1676Sjpk 	 * or group pointer, or the ire_gateway_addr{_v6} field.
1783*1676Sjpk 	 */
1784*1676Sjpk 	if (gcgrp != NULL) {
1785*1676Sjpk 		tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
1786*1676Sjpk 
1787*1676Sjpk 		/*
1788*1676Sjpk 		 * Caller is holding a reference, and that we don't
1789*1676Sjpk 		 * need to hold any lock to access the address.
1790*1676Sjpk 		 */
1791*1676Sjpk 		if (ipversion == IPV4_VERSION) {
1792*1676Sjpk 			ASSERT(ga->ga_af == AF_INET);
1793*1676Sjpk 			IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
1794*1676Sjpk 			paddr = &ga_addr4;
1795*1676Sjpk 		} else {
1796*1676Sjpk 			ASSERT(ga->ga_af == AF_INET6);
1797*1676Sjpk 			paddr = &ga->ga_addr;
1798*1676Sjpk 		}
1799*1676Sjpk 	} else if (ipversion == IPV6_VERSION &&
1800*1676Sjpk 	    !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) {
1801*1676Sjpk 		paddr = &ire->ire_gateway_addr_v6;
1802*1676Sjpk 	} else if (ipversion == IPV4_VERSION &&
1803*1676Sjpk 	    ire->ire_gateway_addr != INADDR_ANY) {
1804*1676Sjpk 		paddr = &ire->ire_gateway_addr;
1805*1676Sjpk 	}
1806*1676Sjpk 
1807*1676Sjpk 	/*
1808*1676Sjpk 	 * Lookup the gateway template; note that we could get an internal
1809*1676Sjpk 	 * template here, which we cache anyway.  During IRE matching, we'll
1810*1676Sjpk 	 * try to update this gateway template cache and hopefully get a
1811*1676Sjpk 	 * real one.
1812*1676Sjpk 	 */
1813*1676Sjpk 	if (paddr != NULL) {
1814*1676Sjpk 		attrp->igsa_rhc = (ipversion == IPV4_VERSION) ?
1815*1676Sjpk 		    find_rhc_v4(paddr) : find_rhc_v6(paddr);
1816*1676Sjpk 	}
1817*1676Sjpk 
1818*1676Sjpk 	if (exists)
1819*1676Sjpk 		mutex_exit(&attrp->igsa_lock);
1820*1676Sjpk 
1821*1676Sjpk 	return (0);
1822*1676Sjpk }
1823*1676Sjpk 
1824*1676Sjpk /*
1825*1676Sjpk  * This function figures the type of MLP that we'll be using based on the
1826*1676Sjpk  * address that the user is binding and the zone.  If the address is
1827*1676Sjpk  * unspecified, then we're looking at both private and shared.  If it's one
1828*1676Sjpk  * of the zone's private addresses, then it's private only.  If it's one
1829*1676Sjpk  * of the global addresses, then it's shared only.
1830*1676Sjpk  *
1831*1676Sjpk  * If we can't figure out what it is, then return mlptSingle.  That's actually
1832*1676Sjpk  * an error case.
1833*1676Sjpk  */
1834*1676Sjpk mlp_type_t
1835*1676Sjpk tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr)
1836*1676Sjpk {
1837*1676Sjpk 	in_addr_t in4;
1838*1676Sjpk 	ire_t *ire;
1839*1676Sjpk 	ipif_t *ipif;
1840*1676Sjpk 	zoneid_t addrzone;
1841*1676Sjpk 
1842*1676Sjpk 	ASSERT(addr != NULL);
1843*1676Sjpk 
1844*1676Sjpk 	if (version == IPV6_VERSION &&
1845*1676Sjpk 	    IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) {
1846*1676Sjpk 		IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4);
1847*1676Sjpk 		addr = &in4;
1848*1676Sjpk 		version = IPV4_VERSION;
1849*1676Sjpk 	}
1850*1676Sjpk 
1851*1676Sjpk 	if (version == IPV4_VERSION) {
1852*1676Sjpk 		in4 = *(const in_addr_t *)addr;
1853*1676Sjpk 		if (in4 == INADDR_ANY)
1854*1676Sjpk 			return (mlptBoth);
1855*1676Sjpk 		ire = ire_cache_lookup(in4, zoneid, NULL);
1856*1676Sjpk 	} else {
1857*1676Sjpk 		if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr))
1858*1676Sjpk 			return (mlptBoth);
1859*1676Sjpk 		ire = ire_cache_lookup_v6(addr, zoneid, NULL);
1860*1676Sjpk 	}
1861*1676Sjpk 	/*
1862*1676Sjpk 	 * If we can't find the IRE, then we have to behave exactly like
1863*1676Sjpk 	 * ip_bind_laddr{,_v6}.  That means looking up the IPIF so that users
1864*1676Sjpk 	 * can bind to addresses on "down" interfaces.
1865*1676Sjpk 	 *
1866*1676Sjpk 	 * If we can't find that either, then the bind is going to fail, so
1867*1676Sjpk 	 * just give up.  Note that there's a miniscule chance that the address
1868*1676Sjpk 	 * is in transition, but we don't bother handling that.
1869*1676Sjpk 	 */
1870*1676Sjpk 	if (ire == NULL) {
1871*1676Sjpk 		if (version == IPV4_VERSION)
1872*1676Sjpk 			ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL,
1873*1676Sjpk 			    zoneid, NULL, NULL, NULL, NULL);
1874*1676Sjpk 		else
1875*1676Sjpk 			ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr,
1876*1676Sjpk 			    NULL, zoneid, NULL, NULL, NULL, NULL);
1877*1676Sjpk 		if (ipif == NULL)
1878*1676Sjpk 			return (mlptSingle);
1879*1676Sjpk 		addrzone = ipif->ipif_zoneid;
1880*1676Sjpk 		ipif_refrele(ipif);
1881*1676Sjpk 	} else {
1882*1676Sjpk 		addrzone = ire->ire_zoneid;
1883*1676Sjpk 		ire_refrele(ire);
1884*1676Sjpk 	}
1885*1676Sjpk 	return (addrzone == ALL_ZONES ? mlptShared : mlptPrivate);
1886*1676Sjpk }
1887*1676Sjpk 
1888*1676Sjpk /*
1889*1676Sjpk  * Since we are configuring local interfaces, and we know trusted
1890*1676Sjpk  * extension CDE requires local interfaces to be cipso host type in
1891*1676Sjpk  * order to function correctly, we'll associate a cipso template
1892*1676Sjpk  * to each local interface and let the interface come up.  Configuring
1893*1676Sjpk  * a local interface to be "unlabeled" host type is a configuration error.
1894*1676Sjpk  * We'll override that error and make the interface host type to be cipso
1895*1676Sjpk  * here.
1896*1676Sjpk  *
1897*1676Sjpk  * The code is optimized for the usual "success" case and unwinds things on
1898*1676Sjpk  * error.  We don't want to go to the trouble and expense of formatting the
1899*1676Sjpk  * interface name for the usual case where everything is configured correctly.
1900*1676Sjpk  */
1901*1676Sjpk boolean_t
1902*1676Sjpk tsol_check_interface_address(const ipif_t *ipif)
1903*1676Sjpk {
1904*1676Sjpk 	tsol_tpc_t *tp;
1905*1676Sjpk 	char addrbuf[INET6_ADDRSTRLEN];
1906*1676Sjpk 	int af;
1907*1676Sjpk 	const void *addr;
1908*1676Sjpk 	zone_t *zone;
1909*1676Sjpk 	ts_label_t *plabel;
1910*1676Sjpk 	const bslabel_t *label;
1911*1676Sjpk 	char ifbuf[LIFNAMSIZ + 10];
1912*1676Sjpk 	const char *ifname;
1913*1676Sjpk 	boolean_t retval;
1914*1676Sjpk 	tsol_rhent_t rhent;
1915*1676Sjpk 
1916*1676Sjpk 	if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) {
1917*1676Sjpk 		af = AF_INET;
1918*1676Sjpk 		addr = &V4_PART_OF_V6(ipif->ipif_v6lcl_addr);
1919*1676Sjpk 	} else {
1920*1676Sjpk 		af = AF_INET6;
1921*1676Sjpk 		addr = &ipif->ipif_v6lcl_addr;
1922*1676Sjpk 	}
1923*1676Sjpk 
1924*1676Sjpk 	tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE);
1925*1676Sjpk 	zone = ipif->ipif_zoneid == ALL_ZONES ? NULL :
1926*1676Sjpk 	    zone_find_by_id(ipif->ipif_zoneid);
1927*1676Sjpk 	if (zone != NULL) {
1928*1676Sjpk 		plabel = zone->zone_slabel;
1929*1676Sjpk 		ASSERT(plabel != NULL);
1930*1676Sjpk 		label = label2bslabel(plabel);
1931*1676Sjpk 	}
1932*1676Sjpk 
1933*1676Sjpk 	/*
1934*1676Sjpk 	 * If it's CIPSO and an all-zones address, then we're done.
1935*1676Sjpk 	 * If it's a CIPSO zone specific address, the zone's label
1936*1676Sjpk 	 * must be in the range or set specified in the template.
1937*1676Sjpk 	 * When the remote host entry is missing or the template
1938*1676Sjpk 	 * type is incorrect for this interface, we create a
1939*1676Sjpk 	 * CIPSO host entry in kernel and allow the interface to be
1940*1676Sjpk 	 * brought up as CIPSO type.
1941*1676Sjpk 	 */
1942*1676Sjpk 	if (tp != NULL && (
1943*1676Sjpk 	    /* The all-zones case */
1944*1676Sjpk 	    (tp->tpc_tp.host_type == SUN_CIPSO &&
1945*1676Sjpk 	    tp->tpc_tp.tp_doi == default_doi &&
1946*1676Sjpk 	    ipif->ipif_zoneid == ALL_ZONES) ||
1947*1676Sjpk 	    /* The local-zone case */
1948*1676Sjpk 	    (zone != NULL && plabel->tsl_doi == tp->tpc_tp.tp_doi &&
1949*1676Sjpk 	    ((tp->tpc_tp.host_type == SUN_CIPSO &&
1950*1676Sjpk 	    (_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) ||
1951*1676Sjpk 	    blinlset(label, tp->tpc_tp.tp_sl_set_cipso))))))) {
1952*1676Sjpk 		if (zone != NULL)
1953*1676Sjpk 			zone_rele(zone);
1954*1676Sjpk 		TPC_RELE(tp);
1955*1676Sjpk 		return (B_TRUE);
1956*1676Sjpk 	}
1957*1676Sjpk 
1958*1676Sjpk 	ifname = ipif->ipif_ill->ill_name;
1959*1676Sjpk 	if (ipif->ipif_id != 0) {
1960*1676Sjpk 		(void) snprintf(ifbuf, sizeof (ifbuf), "%s:%u", ifname,
1961*1676Sjpk 		    ipif->ipif_id);
1962*1676Sjpk 		ifname = ifbuf;
1963*1676Sjpk 	}
1964*1676Sjpk 	(void) inet_ntop(af, addr, addrbuf, sizeof (addrbuf));
1965*1676Sjpk 
1966*1676Sjpk 	if (tp == NULL) {
1967*1676Sjpk 		cmn_err(CE_NOTE, "template entry for %s missing. Default to "
1968*1676Sjpk 		    "CIPSO type for %s", ifname, addrbuf);
1969*1676Sjpk 		retval = B_TRUE;
1970*1676Sjpk 	} else if (tp->tpc_tp.host_type == UNLABELED) {
1971*1676Sjpk 		cmn_err(CE_NOTE, "template type for %s incorrectly configured. "
1972*1676Sjpk 		    "Change to CIPSO type for %s", ifname, addrbuf);
1973*1676Sjpk 		retval = B_TRUE;
1974*1676Sjpk 	} else if (ipif->ipif_zoneid == ALL_ZONES) {
1975*1676Sjpk 		if (tp->tpc_tp.host_type != SUN_CIPSO) {
1976*1676Sjpk 			cmn_err(CE_NOTE, "%s failed: %s isn't set to CIPSO for "
1977*1676Sjpk 			    "all-zones. Converted to CIPSO.", ifname, addrbuf);
1978*1676Sjpk 			retval = B_TRUE;
1979*1676Sjpk 		} else {
1980*1676Sjpk 			cmn_err(CE_NOTE, "%s failed: %s has wrong DOI %d "
1981*1676Sjpk 			    "instead of %d", ifname, addrbuf,
1982*1676Sjpk 			    tp->tpc_tp.tp_doi, default_doi);
1983*1676Sjpk 			retval = B_FALSE;
1984*1676Sjpk 		}
1985*1676Sjpk 	} else if (zone == NULL) {
1986*1676Sjpk 		cmn_err(CE_NOTE, "%s failed: zoneid %d unknown",
1987*1676Sjpk 		    ifname, ipif->ipif_zoneid);
1988*1676Sjpk 		retval = B_FALSE;
1989*1676Sjpk 	} else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
1990*1676Sjpk 		cmn_err(CE_NOTE, "%s failed: zone %s has DOI %d but %s has "
1991*1676Sjpk 		    "DOI %d", ifname, zone->zone_name, plabel->tsl_doi,
1992*1676Sjpk 		    addrbuf, tp->tpc_tp.tp_doi);
1993*1676Sjpk 		retval = B_FALSE;
1994*1676Sjpk 	} else {
1995*1676Sjpk 		cmn_err(CE_NOTE, "%s failed: zone %s label incompatible with "
1996*1676Sjpk 		    "%s", ifname, zone->zone_name, addrbuf);
1997*1676Sjpk 		tsol_print_label(label, "zone label");
1998*1676Sjpk 		retval = B_FALSE;
1999*1676Sjpk 	}
2000*1676Sjpk 
2001*1676Sjpk 	if (zone != NULL)
2002*1676Sjpk 		zone_rele(zone);
2003*1676Sjpk 	if (tp != NULL)
2004*1676Sjpk 		TPC_RELE(tp);
2005*1676Sjpk 	if (retval) {
2006*1676Sjpk 		/*
2007*1676Sjpk 		 * we've corrected a config error and let the interface
2008*1676Sjpk 		 * come up as cipso. Need to insert an rhent.
2009*1676Sjpk 		 */
2010*1676Sjpk 		if ((rhent.rh_address.ta_family = af) == AF_INET) {
2011*1676Sjpk 			rhent.rh_prefix = 32;
2012*1676Sjpk 			rhent.rh_address.ta_addr_v4 = *(struct in_addr *)addr;
2013*1676Sjpk 		} else {
2014*1676Sjpk 			rhent.rh_prefix = 128;
2015*1676Sjpk 			rhent.rh_address.ta_addr_v6 = *(in6_addr_t *)addr;
2016*1676Sjpk 		}
2017*1676Sjpk 		(void) strcpy(rhent.rh_template, "cipso");
2018*1676Sjpk 		if (tnrh_load(&rhent) != 0) {
2019*1676Sjpk 			cmn_err(CE_NOTE, "%s failed: Cannot insert CIPSO "
2020*1676Sjpk 			    "template for local addr %s", ifname, addrbuf);
2021*1676Sjpk 			retval = B_FALSE;
2022*1676Sjpk 		}
2023*1676Sjpk 	}
2024*1676Sjpk 	return (retval);
2025*1676Sjpk }
2026