xref: /onnv-gate/usr/src/uts/common/inet/ip/tn_ipopt.c (revision 1676:37f4a3e2bd99)
1*1676Sjpk /*
2*1676Sjpk  * CDDL HEADER START
3*1676Sjpk  *
4*1676Sjpk  * The contents of this file are subject to the terms of the
5*1676Sjpk  * Common Development and Distribution License (the "License").
6*1676Sjpk  * You may not use this file except in compliance with the License.
7*1676Sjpk  *
8*1676Sjpk  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*1676Sjpk  * or http://www.opensolaris.org/os/licensing.
10*1676Sjpk  * See the License for the specific language governing permissions
11*1676Sjpk  * and limitations under the License.
12*1676Sjpk  *
13*1676Sjpk  * When distributing Covered Code, include this CDDL HEADER in each
14*1676Sjpk  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*1676Sjpk  * If applicable, add the following below this CDDL HEADER, with the
16*1676Sjpk  * fields enclosed by brackets "[]" replaced with your own identifying
17*1676Sjpk  * information: Portions Copyright [yyyy] [name of copyright owner]
18*1676Sjpk  *
19*1676Sjpk  * CDDL HEADER END
20*1676Sjpk  */
21*1676Sjpk /*
22*1676Sjpk  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23*1676Sjpk  * Use is subject to license terms.
24*1676Sjpk  */
25*1676Sjpk 
26*1676Sjpk #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*1676Sjpk 
28*1676Sjpk #include <sys/types.h>
29*1676Sjpk #include <sys/systm.h>
30*1676Sjpk #include <sys/kmem.h>
31*1676Sjpk #include <sys/disp.h>
32*1676Sjpk #include <sys/stream.h>
33*1676Sjpk #include <sys/strsubr.h>
34*1676Sjpk #include <sys/strsun.h>
35*1676Sjpk #include <sys/policy.h>
36*1676Sjpk #include <sys/tsol/label_macro.h>
37*1676Sjpk #include <sys/tsol/tndb.h>
38*1676Sjpk #include <sys/tsol/tnet.h>
39*1676Sjpk #include <inet/ip.h>
40*1676Sjpk #include <inet/ip6.h>
41*1676Sjpk #include <inet/tcp.h>
42*1676Sjpk #include <inet/ipclassifier.h>
43*1676Sjpk #include <inet/ip_ire.h>
44*1676Sjpk 
45*1676Sjpk /*
46*1676Sjpk  * This routine takes a sensitivity label as input and creates a CIPSO
47*1676Sjpk  * option in the specified buffer.  It returns the size of the CIPSO option.
48*1676Sjpk  * If the sensitivity label is too large for the CIPSO option, then 0
49*1676Sjpk  * is returned.
50*1676Sjpk  *
51*1676Sjpk  * tsol2cipso_tt1 returns 0 for failure and greater than 0 for success
52*1676Sjpk  * (more accurately, success means a return value between 10 and 40).
53*1676Sjpk  */
54*1676Sjpk 
55*1676Sjpk static int
56*1676Sjpk tsol2cipso_tt1(const bslabel_t *sl, unsigned char *cop, uint32_t doi)
57*1676Sjpk {
58*1676Sjpk 	struct cipso_tag_type_1 *tt1;
59*1676Sjpk 	const _bslabel_impl_t *bsl;
60*1676Sjpk 	const uchar_t *ucp;
61*1676Sjpk 	int i;
62*1676Sjpk 
63*1676Sjpk 	if (doi == 0)
64*1676Sjpk 		return (0);
65*1676Sjpk 
66*1676Sjpk 	/* check for Admin High sensitivity label */
67*1676Sjpk 	if (blequal(sl, label2bslabel(l_admin_high)))
68*1676Sjpk 		return (0);
69*1676Sjpk 
70*1676Sjpk 	/* check whether classification will fit in one octet */
71*1676Sjpk 	bsl = (const _bslabel_impl_t *)sl;
72*1676Sjpk 	if (LCLASS(bsl) & 0xFF00)
73*1676Sjpk 		return (0);
74*1676Sjpk 
75*1676Sjpk 	/*
76*1676Sjpk 	 * Check whether compartments will fit in 30 octets.
77*1676Sjpk 	 * Compartments 241 - 256 are not allowed.
78*1676Sjpk 	 */
79*1676Sjpk 	if (ntohl(bsl->compartments.c8) & 0x0000FFFF)
80*1676Sjpk 		return (0);
81*1676Sjpk 
82*1676Sjpk 	/*
83*1676Sjpk 	 * Compute option length and tag length.
84*1676Sjpk 	 * 'p' points to the last two bytes in the Sensitivity Label's
85*1676Sjpk 	 * compartments; these cannot be mapped into CIPSO compartments.
86*1676Sjpk 	 */
87*1676Sjpk 	ucp = (const uchar_t *)&bsl->compartments.c8 + 2;
88*1676Sjpk 	while (--ucp >= (const uchar_t *)&bsl->compartments.c1)
89*1676Sjpk 		if (*ucp != 0)
90*1676Sjpk 			break;
91*1676Sjpk 
92*1676Sjpk 	i =  ucp - (const uchar_t *)&bsl->compartments.c1 + 1;
93*1676Sjpk 
94*1676Sjpk 	if (cop == NULL)
95*1676Sjpk 		return (10 + i);
96*1676Sjpk 
97*1676Sjpk 	doi = htonl(doi);
98*1676Sjpk 	ucp = (const uchar_t *)&doi;
99*1676Sjpk 	cop[IPOPT_OPTVAL] = IPOPT_COMSEC;
100*1676Sjpk 	cop[IPOPT_OLEN] = 10 + i;
101*1676Sjpk 	cop[IPOPT_OLEN+1] = ucp[0];
102*1676Sjpk 	cop[IPOPT_OLEN+2] = ucp[1];
103*1676Sjpk 	cop[IPOPT_OLEN+3] = ucp[2];
104*1676Sjpk 	cop[IPOPT_OLEN+4] = ucp[3];
105*1676Sjpk 	tt1 = (struct cipso_tag_type_1 *)&cop[IPOPT_OLEN + 5];
106*1676Sjpk 	tt1->tag_type = 1;
107*1676Sjpk 	tt1->tag_align = 0;
108*1676Sjpk 	tt1->tag_sl = LCLASS(bsl);
109*1676Sjpk 	tt1->tag_length = 4 + i;
110*1676Sjpk 
111*1676Sjpk 	bcopy(&bsl->compartments.c1, tt1->tag_cat, i);
112*1676Sjpk 
113*1676Sjpk 	return (cop[IPOPT_OLEN]);
114*1676Sjpk }
115*1676Sjpk 
116*1676Sjpk /*
117*1676Sjpk  * The following routine copies a datagram's option into the specified buffer
118*1676Sjpk  * (if buffer pointer is non-null), or returns a pointer to the label within
119*1676Sjpk  * the streams message (if buffer is null).  In both cases, tsol_get_option
120*1676Sjpk  * returns the option's type.
121*1676Sjpk  *
122*1676Sjpk  * tsol_get_option assumes that the specified buffer is large enough to
123*1676Sjpk  * hold the largest valid CIPSO option.  Since the total number of
124*1676Sjpk  * IP header options cannot exceed 40 bytes, a 40 byte buffer is a good choice.
125*1676Sjpk  */
126*1676Sjpk 
127*1676Sjpk tsol_ip_label_t
128*1676Sjpk tsol_get_option(mblk_t *mp, uchar_t **buffer)
129*1676Sjpk {
130*1676Sjpk 	ipha_t	*ipha;
131*1676Sjpk 	uchar_t	*opt;
132*1676Sjpk 	uint32_t	totallen;
133*1676Sjpk 	uint32_t	optval;
134*1676Sjpk 	uint32_t	optlen;
135*1676Sjpk 
136*1676Sjpk 	ipha = (ipha_t *)mp->b_rptr;
137*1676Sjpk 
138*1676Sjpk 	/*
139*1676Sjpk 	 * Get length (in 4 byte octets) of IP header options.
140*1676Sjpk 	 * If header doesn't contain options, then return OPT_NONE.
141*1676Sjpk 	 */
142*1676Sjpk 	totallen = ipha->ipha_version_and_hdr_length -
143*1676Sjpk 	    (uint8_t)((IP_VERSION << 4) + IP_SIMPLE_HDR_LENGTH_IN_WORDS);
144*1676Sjpk 
145*1676Sjpk 	if (totallen == 0)
146*1676Sjpk 		return (OPT_NONE);
147*1676Sjpk 
148*1676Sjpk 	totallen <<= 2;
149*1676Sjpk 
150*1676Sjpk 	/*
151*1676Sjpk 	 * Search for CIPSO option.
152*1676Sjpk 	 * If no such option is present, then return OPT_NONE.
153*1676Sjpk 	 */
154*1676Sjpk 	opt = (uchar_t *)&ipha[1];
155*1676Sjpk 	while (totallen != 0) {
156*1676Sjpk 		switch (optval = opt[IPOPT_OPTVAL]) {
157*1676Sjpk 		case IPOPT_EOL:
158*1676Sjpk 			return (OPT_NONE);
159*1676Sjpk 		case IPOPT_NOP:
160*1676Sjpk 			optlen = 1;
161*1676Sjpk 			break;
162*1676Sjpk 		default:
163*1676Sjpk 			if (totallen <= IPOPT_OLEN)
164*1676Sjpk 				return (OPT_NONE);
165*1676Sjpk 			optlen = opt[IPOPT_OLEN];
166*1676Sjpk 			if (optlen < 2)
167*1676Sjpk 				return (OPT_NONE);
168*1676Sjpk 		}
169*1676Sjpk 		if (optlen > totallen)
170*1676Sjpk 			return (OPT_NONE);
171*1676Sjpk 		/*
172*1676Sjpk 		 * Copy pointer to option into '*buffer' and
173*1676Sjpk 		 * return the option type.
174*1676Sjpk 		 */
175*1676Sjpk 		switch (optval) {
176*1676Sjpk 		case IPOPT_COMSEC:
177*1676Sjpk 			*buffer = opt;
178*1676Sjpk 			if (TSOL_CIPSO_TAG_OFFSET < optlen &&
179*1676Sjpk 			    opt[TSOL_CIPSO_TAG_OFFSET] == 1)
180*1676Sjpk 				return (OPT_CIPSO);
181*1676Sjpk 			return (OPT_NONE);
182*1676Sjpk 		}
183*1676Sjpk 		totallen -= optlen;
184*1676Sjpk 		opt += optlen;
185*1676Sjpk 	}
186*1676Sjpk 	return (OPT_NONE);
187*1676Sjpk }
188*1676Sjpk 
189*1676Sjpk /*
190*1676Sjpk  * tsol_compute_label()
191*1676Sjpk  *
192*1676Sjpk  * This routine computes the IP label that should be on a packet based on the
193*1676Sjpk  * connection and destination information.
194*1676Sjpk  *
195*1676Sjpk  * Returns:
196*1676Sjpk  *      0		Fetched label
197*1676Sjpk  *      EACCES		The packet failed the remote host accreditation
198*1676Sjpk  *      ENOMEM		Memory allocation failure
199*1676Sjpk  *	EINVAL		Label cannot be computed
200*1676Sjpk  */
201*1676Sjpk int
202*1676Sjpk tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage,
203*1676Sjpk     boolean_t isexempt)
204*1676Sjpk {
205*1676Sjpk 	uint_t		sec_opt_len;
206*1676Sjpk 	ts_label_t	*tsl;
207*1676Sjpk 	tsol_tpc_t	*dst_rhtp;
208*1676Sjpk 	ire_t		*ire, *sire = NULL;
209*1676Sjpk 	boolean_t	compute_label = B_FALSE;
210*1676Sjpk 	tsol_ire_gw_secattr_t *attrp;
211*1676Sjpk 	zoneid_t	zoneid;
212*1676Sjpk 
213*1676Sjpk 	if (opt_storage != NULL)
214*1676Sjpk 		opt_storage[IPOPT_OLEN] = 0;
215*1676Sjpk 
216*1676Sjpk 	if ((tsl = crgetlabel(credp)) == NULL)
217*1676Sjpk 		return (0);
218*1676Sjpk 
219*1676Sjpk 	/* always pass multicast */
220*1676Sjpk 	if (CLASSD(dst))
221*1676Sjpk 		return (0);
222*1676Sjpk 
223*1676Sjpk 	if ((dst_rhtp = find_tpc(&dst, IPV4_VERSION, B_FALSE)) == NULL) {
224*1676Sjpk 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__lookupdst__v4,
225*1676Sjpk 		    char *, "destination ip(1) not in database (with creds(2))",
226*1676Sjpk 		    ipaddr_t, dst, cred_t *, credp);
227*1676Sjpk 		return (EINVAL);
228*1676Sjpk 	}
229*1676Sjpk 
230*1676Sjpk 	zoneid = crgetzoneid(credp);
231*1676Sjpk 
232*1676Sjpk 	switch (dst_rhtp->tpc_tp.host_type) {
233*1676Sjpk 	case UNLABELED:
234*1676Sjpk 		/*
235*1676Sjpk 		 * Only add a label if the unlabeled destination is
236*1676Sjpk 		 * not broadcast/local/loopback address, that it is
237*1676Sjpk 		 * not on the same subnet, and that the next-hop
238*1676Sjpk 		 * gateway is labeled.
239*1676Sjpk 		 */
240*1676Sjpk 		ire = ire_cache_lookup(dst, zoneid, tsl);
241*1676Sjpk 
242*1676Sjpk 		if (ire != NULL && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL |
243*1676Sjpk 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
244*1676Sjpk 			IRE_REFRELE(ire);
245*1676Sjpk 			TPC_RELE(dst_rhtp);
246*1676Sjpk 			return (0);
247*1676Sjpk 		} else if (ire == NULL) {
248*1676Sjpk 			ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire,
249*1676Sjpk 			    zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
250*1676Sjpk 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR));
251*1676Sjpk 		}
252*1676Sjpk 
253*1676Sjpk 		/* no route to destination */
254*1676Sjpk 		if (ire == NULL) {
255*1676Sjpk 			DTRACE_PROBE4(
256*1676Sjpk 			    tx__tnopt__log__info__labeling__routedst__v4,
257*1676Sjpk 			    char *, "No route to unlabeled dest ip(1)/tpc(2) "
258*1676Sjpk 			    "with creds(3).", ipaddr_t, dst, tsol_tpc_t *,
259*1676Sjpk 			    dst_rhtp, cred_t *, credp);
260*1676Sjpk 			TPC_RELE(dst_rhtp);
261*1676Sjpk 			return (EINVAL);
262*1676Sjpk 		}
263*1676Sjpk 
264*1676Sjpk 		/*
265*1676Sjpk 		 * Prefix IRE from f-table lookup means that the destination
266*1676Sjpk 		 * is not directly connected; check the next-hop attributes.
267*1676Sjpk 		 */
268*1676Sjpk 		if (sire != NULL) {
269*1676Sjpk 			ASSERT(ire != NULL);
270*1676Sjpk 			IRE_REFRELE(ire);
271*1676Sjpk 			ire = sire;
272*1676Sjpk 		}
273*1676Sjpk 
274*1676Sjpk 		attrp = ire->ire_gw_secattr;
275*1676Sjpk 		if (attrp != NULL && attrp->igsa_rhc != NULL &&
276*1676Sjpk 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type != UNLABELED)
277*1676Sjpk 			compute_label = B_TRUE;
278*1676Sjpk 
279*1676Sjpk 		/*
280*1676Sjpk 		 * Can talk to unlabeled hosts if
281*1676Sjpk 		 * (1) zone's label matches the default label, or
282*1676Sjpk 		 * (2) SO_MAC_EXEMPT is on and we dominate the peer's label
283*1676Sjpk 		 * (3) SO_MAC_EXEMPT is on and this is the global zone
284*1676Sjpk 		 */
285*1676Sjpk 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi ||
286*1676Sjpk 		    (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
287*1676Sjpk 		    &tsl->tsl_label) && (!isexempt ||
288*1676Sjpk 		    (zoneid != GLOBAL_ZONEID && !bldominates(&tsl->tsl_label,
289*1676Sjpk 		    &dst_rhtp->tpc_tp.tp_def_label))))) {
290*1676Sjpk 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v4,
291*1676Sjpk 			    char *, "unlabeled dest ip(1)/tpc(2) "
292*1676Sjpk 			    "non-matching creds(3).", ipaddr_t, dst,
293*1676Sjpk 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
294*1676Sjpk 			IRE_REFRELE(ire);
295*1676Sjpk 			TPC_RELE(dst_rhtp);
296*1676Sjpk 			return (EACCES);
297*1676Sjpk 		}
298*1676Sjpk 
299*1676Sjpk 		IRE_REFRELE(ire);
300*1676Sjpk 		break;
301*1676Sjpk 
302*1676Sjpk 	case SUN_CIPSO:
303*1676Sjpk 		/*
304*1676Sjpk 		 * Can talk to labeled hosts if zone's label is within target's
305*1676Sjpk 		 * label range or set.
306*1676Sjpk 		 */
307*1676Sjpk 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
308*1676Sjpk 		    (!_blinrange(&tsl->tsl_label,
309*1676Sjpk 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
310*1676Sjpk 		    !blinlset(&tsl->tsl_label,
311*1676Sjpk 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
312*1676Sjpk 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v4,
313*1676Sjpk 			    char *, "labeled dest ip(1)/tpc(2) "
314*1676Sjpk 			    "non-matching creds(3).", ipaddr_t, dst,
315*1676Sjpk 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
316*1676Sjpk 			TPC_RELE(dst_rhtp);
317*1676Sjpk 			return (EACCES);
318*1676Sjpk 		}
319*1676Sjpk 		compute_label = B_TRUE;
320*1676Sjpk 		break;
321*1676Sjpk 
322*1676Sjpk 	default:
323*1676Sjpk 		TPC_RELE(dst_rhtp);
324*1676Sjpk 		return (EACCES);
325*1676Sjpk 	}
326*1676Sjpk 
327*1676Sjpk 	if (!compute_label) {
328*1676Sjpk 		TPC_RELE(dst_rhtp);
329*1676Sjpk 		return (0);
330*1676Sjpk 	}
331*1676Sjpk 
332*1676Sjpk 	/* compute the CIPSO option */
333*1676Sjpk 	if (dst_rhtp->tpc_tp.host_type != UNLABELED)
334*1676Sjpk 		sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
335*1676Sjpk 		    tsl->tsl_doi);
336*1676Sjpk 	else
337*1676Sjpk 		sec_opt_len = tsol2cipso_tt1(&dst_rhtp->tpc_tp.tp_def_label,
338*1676Sjpk 		    opt_storage, tsl->tsl_doi);
339*1676Sjpk 	TPC_RELE(dst_rhtp);
340*1676Sjpk 
341*1676Sjpk 	if (sec_opt_len == 0) {
342*1676Sjpk 		DTRACE_PROBE4(tx__tnopt__log__error__labeling__lostops__v4,
343*1676Sjpk 		    char *,
344*1676Sjpk 		    "options lack length for dest ip(1)/tpc(2) with creds(3).",
345*1676Sjpk 		    ipaddr_t, dst, tsol_tpc_t *, dst_rhtp, cred_t *, credp);
346*1676Sjpk 		return (EINVAL);
347*1676Sjpk 	}
348*1676Sjpk 
349*1676Sjpk 	return (0);
350*1676Sjpk }
351*1676Sjpk 
352*1676Sjpk /*
353*1676Sjpk  * Remove any existing security option (CIPSO) from the given IP
354*1676Sjpk  * header, move the 'buflen' bytes back to fill the gap, and return the number
355*1676Sjpk  * of bytes removed (as zero or negative number).  Assumes that the headers are
356*1676Sjpk  * sane.
357*1676Sjpk  */
358*1676Sjpk int
359*1676Sjpk tsol_remove_secopt(ipha_t *ipha, int buflen)
360*1676Sjpk {
361*1676Sjpk 	int remlen, olen, oval, delta;
362*1676Sjpk 	uchar_t *fptr, *tptr;
363*1676Sjpk 	boolean_t noop_keep;
364*1676Sjpk 
365*1676Sjpk 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
366*1676Sjpk 	fptr = tptr = (uchar_t *)(ipha + 1);
367*1676Sjpk 	noop_keep = B_TRUE;
368*1676Sjpk 	while (remlen > 0) {
369*1676Sjpk 		oval = fptr[IPOPT_OPTVAL];
370*1676Sjpk 
371*1676Sjpk 		/* terminate on end of list */
372*1676Sjpk 		if (oval == IPOPT_EOL)
373*1676Sjpk 			break;
374*1676Sjpk 
375*1676Sjpk 		/*
376*1676Sjpk 		 * Delete any no-ops following a deleted option, at least up
377*1676Sjpk 		 * to a 4 octet alignment; copy others.
378*1676Sjpk 		 */
379*1676Sjpk 		if (oval == IPOPT_NOP) {
380*1676Sjpk 			if (((fptr - (uchar_t *)ipha) & 3) == 0)
381*1676Sjpk 				noop_keep = B_TRUE;
382*1676Sjpk 			if (noop_keep)
383*1676Sjpk 				*tptr++ = oval;
384*1676Sjpk 			fptr++;
385*1676Sjpk 			remlen--;
386*1676Sjpk 			continue;
387*1676Sjpk 		}
388*1676Sjpk 
389*1676Sjpk 		/* stop on corrupted list; just do nothing. */
390*1676Sjpk 		if (remlen < 2)
391*1676Sjpk 			return (0);
392*1676Sjpk 		olen = fptr[IPOPT_OLEN];
393*1676Sjpk 		if (olen < 2 || olen > remlen)
394*1676Sjpk 			return (0);
395*1676Sjpk 
396*1676Sjpk 		/* skip over security options to delete them */
397*1676Sjpk 		if (oval == IPOPT_COMSEC || oval == IPOPT_SECURITY) {
398*1676Sjpk 			noop_keep = B_FALSE;
399*1676Sjpk 			fptr += olen;
400*1676Sjpk 			remlen -= olen;
401*1676Sjpk 			continue;
402*1676Sjpk 		}
403*1676Sjpk 
404*1676Sjpk 		/* copy the rest */
405*1676Sjpk 		noop_keep = B_TRUE;
406*1676Sjpk 		if (tptr != fptr)
407*1676Sjpk 			ovbcopy(fptr, tptr, olen);
408*1676Sjpk 		fptr += olen;
409*1676Sjpk 		tptr += olen;
410*1676Sjpk 		remlen -= olen;
411*1676Sjpk 	}
412*1676Sjpk 
413*1676Sjpk 	fptr += remlen;
414*1676Sjpk 
415*1676Sjpk 	/* figure how much padding we'll need for header alignment */
416*1676Sjpk 	olen = (tptr - (uchar_t *)ipha) & 3;
417*1676Sjpk 	if (olen > 0) {
418*1676Sjpk 		olen = 4 - olen;
419*1676Sjpk 		/* pad with end-of-list */
420*1676Sjpk 		bzero(tptr, olen);
421*1676Sjpk 		tptr += olen;
422*1676Sjpk 	}
423*1676Sjpk 
424*1676Sjpk 	/* slide back the headers that follow and update the IP header */
425*1676Sjpk 	delta = fptr - tptr;
426*1676Sjpk 	if (delta != 0) {
427*1676Sjpk 		ovbcopy(fptr, tptr, ((uchar_t *)ipha + buflen) - fptr);
428*1676Sjpk 		ipha->ipha_version_and_hdr_length -= delta / 4;
429*1676Sjpk 	}
430*1676Sjpk 	return (-delta);
431*1676Sjpk }
432*1676Sjpk 
433*1676Sjpk /*
434*1676Sjpk  * Insert the option in 'optbuf' into the IP header pointed to by 'ipha', and
435*1676Sjpk  * move the data following the IP header (up to buflen) to accomodate the new
436*1676Sjpk  * option.  Assumes that up to IP_MAX_OPT_LENGTH bytes are available (in total)
437*1676Sjpk  * for IP options.  Returns the number of bytes actually inserted, or -1 if the
438*1676Sjpk  * option cannot be inserted.  (Note that negative return values are possible
439*1676Sjpk  * when noops must be compressed, and that only -1 indicates error.  Successful
440*1676Sjpk  * return value is always evenly divisible by 4, by definition.)
441*1676Sjpk  */
442*1676Sjpk int
443*1676Sjpk tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen)
444*1676Sjpk {
445*1676Sjpk 	int remlen, padding, lastpad, totlen;
446*1676Sjpk 	int oval, olen;
447*1676Sjpk 	int delta;
448*1676Sjpk 	uchar_t *optr;
449*1676Sjpk 	uchar_t tempopt[IP_MAX_OPT_LENGTH], *toptr;
450*1676Sjpk 
451*1676Sjpk 	if (optbuf[IPOPT_OPTVAL] == IPOPT_EOL ||
452*1676Sjpk 	    optbuf[IPOPT_OPTVAL] == IPOPT_NOP ||
453*1676Sjpk 	    optbuf[IPOPT_OLEN] == 0)
454*1676Sjpk 		return (0);
455*1676Sjpk 
456*1676Sjpk 	ASSERT(optbuf[IPOPT_OLEN] >= 2 &&
457*1676Sjpk 	    optbuf[IPOPT_OLEN] <= IP_MAX_OPT_LENGTH);
458*1676Sjpk 
459*1676Sjpk 	/* first find the real (unpadded) length of the existing options */
460*1676Sjpk 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
461*1676Sjpk 	padding = totlen = lastpad = 0;
462*1676Sjpk 	optr = (uchar_t *)(ipha + 1);
463*1676Sjpk 	while (remlen > 0) {
464*1676Sjpk 		oval = optr[IPOPT_OPTVAL];
465*1676Sjpk 
466*1676Sjpk 		/* stop at end of list */
467*1676Sjpk 		if (oval == IPOPT_EOL)
468*1676Sjpk 			break;
469*1676Sjpk 
470*1676Sjpk 		/* skip no-ops, noting that length byte isn't present */
471*1676Sjpk 		if (oval == IPOPT_NOP) {
472*1676Sjpk 			optr++;
473*1676Sjpk 			padding++;
474*1676Sjpk 			lastpad++;
475*1676Sjpk 			totlen++;
476*1676Sjpk 			remlen--;
477*1676Sjpk 			continue;
478*1676Sjpk 		}
479*1676Sjpk 
480*1676Sjpk 		/* give up on a corrupted list; report failure */
481*1676Sjpk 		if (remlen < 2)
482*1676Sjpk 			return (-1);
483*1676Sjpk 		olen = optr[IPOPT_OLEN];
484*1676Sjpk 		if (olen < 2 || olen > remlen)
485*1676Sjpk 			return (-1);
486*1676Sjpk 
487*1676Sjpk 		lastpad = 0;
488*1676Sjpk 		optr += olen;
489*1676Sjpk 		totlen += olen;
490*1676Sjpk 		remlen -= olen;
491*1676Sjpk 	}
492*1676Sjpk 
493*1676Sjpk 	/* completely ignore any trailing padding */
494*1676Sjpk 	totlen -= lastpad;
495*1676Sjpk 	padding -= lastpad;
496*1676Sjpk 
497*1676Sjpk 	/*
498*1676Sjpk 	 * If some sort of inter-option alignment was present, try to preserve
499*1676Sjpk 	 * that alignment.  If alignment pushes us out past the maximum, then
500*1676Sjpk 	 * discard it and try to compress to fit.  (We just "assume" that any
501*1676Sjpk 	 * padding added was attempting to get 32 bit alignment.  If that's
502*1676Sjpk 	 * wrong, that's just too bad.)
503*1676Sjpk 	 */
504*1676Sjpk 	if (padding > 0) {
505*1676Sjpk 		olen = (optbuf[IPOPT_OLEN] + 3) & ~3;
506*1676Sjpk 		if (olen + totlen > IP_MAX_OPT_LENGTH) {
507*1676Sjpk 			totlen -= padding;
508*1676Sjpk 			if (olen + totlen > IP_MAX_OPT_LENGTH)
509*1676Sjpk 				return (-1);
510*1676Sjpk 			padding = 0;
511*1676Sjpk 		}
512*1676Sjpk 	}
513*1676Sjpk 
514*1676Sjpk 	/*
515*1676Sjpk 	 * Since we may need to compress or expand the option list, we write to
516*1676Sjpk 	 * a temporary buffer and then copy the results back to the IP header.
517*1676Sjpk 	 */
518*1676Sjpk 	toptr = tempopt;
519*1676Sjpk 
520*1676Sjpk 	/* compute actual option to insert */
521*1676Sjpk 	olen = optbuf[IPOPT_OLEN];
522*1676Sjpk 	bcopy(optbuf, toptr, olen);
523*1676Sjpk 	toptr += olen;
524*1676Sjpk 	if (padding > 0) {
525*1676Sjpk 		while ((olen & 3) != 0) {
526*1676Sjpk 			*toptr++ = IPOPT_NOP;
527*1676Sjpk 			olen++;
528*1676Sjpk 		}
529*1676Sjpk 	}
530*1676Sjpk 
531*1676Sjpk 	/* copy over the existing options */
532*1676Sjpk 	optr = (uchar_t *)(ipha + 1);
533*1676Sjpk 	while (totlen > 0) {
534*1676Sjpk 		oval = optr[IPOPT_OPTVAL];
535*1676Sjpk 
536*1676Sjpk 		/* totlen doesn't include end-of-list marker */
537*1676Sjpk 		ASSERT(oval != IPOPT_EOL);
538*1676Sjpk 
539*1676Sjpk 		/* handle no-ops; copy if desired, ignore otherwise */
540*1676Sjpk 		if (oval == IPOPT_NOP) {
541*1676Sjpk 			if (padding > 0) {
542*1676Sjpk 				/* note: cannot overflow due to checks above */
543*1676Sjpk 				ASSERT(toptr < tempopt + IP_MAX_OPT_LENGTH);
544*1676Sjpk 				*toptr++ = oval;
545*1676Sjpk 			}
546*1676Sjpk 			optr++;
547*1676Sjpk 			totlen--;
548*1676Sjpk 			continue;
549*1676Sjpk 		}
550*1676Sjpk 
551*1676Sjpk 		/* list cannot be corrupt at this point */
552*1676Sjpk 		ASSERT(totlen >= 2);
553*1676Sjpk 		olen = optr[IPOPT_OLEN];
554*1676Sjpk 		ASSERT(olen >= 2 && olen <= totlen);
555*1676Sjpk 
556*1676Sjpk 		/* cannot run out of room due to tests above */
557*1676Sjpk 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
558*1676Sjpk 
559*1676Sjpk 		bcopy(optr, toptr, olen);
560*1676Sjpk 		optr += olen;
561*1676Sjpk 		toptr += olen;
562*1676Sjpk 		totlen -= olen;
563*1676Sjpk 	}
564*1676Sjpk 
565*1676Sjpk 	/* figure how much padding we'll need for header alignment */
566*1676Sjpk 	olen = (toptr - tempopt) & 3;
567*1676Sjpk 	if (olen > 0) {
568*1676Sjpk 		olen = 4 - olen;
569*1676Sjpk 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
570*1676Sjpk 		/* pad with end-of-list value */
571*1676Sjpk 		bzero(toptr, olen);
572*1676Sjpk 		toptr += olen;
573*1676Sjpk 	}
574*1676Sjpk 
575*1676Sjpk 	/* move the headers as needed and update IP header */
576*1676Sjpk 	olen = (toptr - tempopt) + IP_SIMPLE_HDR_LENGTH;
577*1676Sjpk 	remlen = IPH_HDR_LENGTH(ipha);
578*1676Sjpk 	delta = olen - remlen;
579*1676Sjpk 	if (delta != 0) {
580*1676Sjpk 		ovbcopy((uchar_t *)ipha + remlen, (uchar_t *)ipha + olen,
581*1676Sjpk 		    buflen - remlen);
582*1676Sjpk 		ipha->ipha_version_and_hdr_length += delta / 4;
583*1676Sjpk 	}
584*1676Sjpk 
585*1676Sjpk 	/* slap in the new options */
586*1676Sjpk 	bcopy(tempopt, ipha + 1, olen - IP_SIMPLE_HDR_LENGTH);
587*1676Sjpk 
588*1676Sjpk 	return (delta);
589*1676Sjpk }
590*1676Sjpk 
591*1676Sjpk /*
592*1676Sjpk  * tsol_check_label()
593*1676Sjpk  *
594*1676Sjpk  * This routine computes the IP label that should be on the packet based on the
595*1676Sjpk  * connection and destination information.  If the label is there, it returns
596*1676Sjpk  * zero, so the caller knows that the label is syncronized, and further calls
597*1676Sjpk  * are not required.  If the label isn't right, then the right one is inserted.
598*1676Sjpk  *
599*1676Sjpk  * The packet's header is clear, before entering IPSec's engine.
600*1676Sjpk  *
601*1676Sjpk  * Returns:
602*1676Sjpk  *      0		Label on packet (was|is now) correct
603*1676Sjpk  *      EACCES		The packet failed the remote host accreditation.
604*1676Sjpk  *      ENOMEM		Memory allocation failure.
605*1676Sjpk  *	EINVAL		Label cannot be computed
606*1676Sjpk  */
607*1676Sjpk int
608*1676Sjpk tsol_check_label(const cred_t *credp, mblk_t **mpp, int *addedp,
609*1676Sjpk     boolean_t isexempt)
610*1676Sjpk {
611*1676Sjpk 	mblk_t *mp = *mpp;
612*1676Sjpk 	ipha_t  *ipha;
613*1676Sjpk 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
614*1676Sjpk 	uint_t hlen;
615*1676Sjpk 	uint_t sec_opt_len;
616*1676Sjpk 	uchar_t *optr;
617*1676Sjpk 	int added;
618*1676Sjpk 	int retv;
619*1676Sjpk 
620*1676Sjpk 	if (addedp != NULL)
621*1676Sjpk 		*addedp = 0;
622*1676Sjpk 
623*1676Sjpk 	opt_storage[IPOPT_OPTVAL] = 0;
624*1676Sjpk 
625*1676Sjpk 	ipha = (ipha_t *)mp->b_rptr;
626*1676Sjpk 
627*1676Sjpk 	retv = tsol_compute_label(credp, ipha->ipha_dst, opt_storage, isexempt);
628*1676Sjpk 	if (retv != 0)
629*1676Sjpk 		return (retv);
630*1676Sjpk 
631*1676Sjpk 	optr = (uchar_t *)(ipha + 1);
632*1676Sjpk 	hlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
633*1676Sjpk 	sec_opt_len = opt_storage[IPOPT_OLEN];
634*1676Sjpk 
635*1676Sjpk 	if (hlen >= sec_opt_len) {
636*1676Sjpk 		/* If no option is supposed to be there, make sure it's not */
637*1676Sjpk 		if (sec_opt_len == 0 && hlen > 0 &&
638*1676Sjpk 		    optr[IPOPT_OPTVAL] != IPOPT_COMSEC &&
639*1676Sjpk 		    optr[IPOPT_OPTVAL] != IPOPT_SECURITY)
640*1676Sjpk 			return (0);
641*1676Sjpk 		/* if the option is there, it's always first */
642*1676Sjpk 		if (sec_opt_len != 0 &&
643*1676Sjpk 		    bcmp(opt_storage, optr, sec_opt_len) == 0)
644*1676Sjpk 			return (0);
645*1676Sjpk 	}
646*1676Sjpk 
647*1676Sjpk 	/*
648*1676Sjpk 	 * If there is an option there, then it must be the wrong one; delete.
649*1676Sjpk 	 */
650*1676Sjpk 	if (hlen > 0)
651*1676Sjpk 		mp->b_wptr += tsol_remove_secopt(ipha, MBLKL(mp));
652*1676Sjpk 
653*1676Sjpk 	/* Make sure we have room for the worst-case addition */
654*1676Sjpk 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
655*1676Sjpk 	hlen = (hlen + 3) & ~3;
656*1676Sjpk 	if (hlen > IP_MAX_HDR_LENGTH)
657*1676Sjpk 		hlen = IP_MAX_HDR_LENGTH;
658*1676Sjpk 	hlen -= IPH_HDR_LENGTH(ipha);
659*1676Sjpk 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
660*1676Sjpk 		int copylen;
661*1676Sjpk 		mblk_t *new_mp;
662*1676Sjpk 
663*1676Sjpk 		/* allocate enough to be meaningful, but not *too* much */
664*1676Sjpk 		copylen = MBLKL(mp);
665*1676Sjpk 		if (copylen > 256)
666*1676Sjpk 			copylen = 256;
667*1676Sjpk 		new_mp = allocb(hlen + copylen +
668*1676Sjpk 		    (mp->b_rptr - mp->b_datap->db_base), BPRI_HI);
669*1676Sjpk 		if (new_mp == NULL)
670*1676Sjpk 			return (ENOMEM);
671*1676Sjpk 		mblk_setcred(new_mp, DB_CRED(mp));
672*1676Sjpk 
673*1676Sjpk 		/* keep the bias */
674*1676Sjpk 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
675*1676Sjpk 		new_mp->b_wptr = new_mp->b_rptr + copylen;
676*1676Sjpk 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
677*1676Sjpk 		new_mp->b_cont = mp;
678*1676Sjpk 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
679*1676Sjpk 			new_mp->b_cont = mp->b_cont;
680*1676Sjpk 			freeb(mp);
681*1676Sjpk 		}
682*1676Sjpk 		*mpp = mp = new_mp;
683*1676Sjpk 		ipha = (ipha_t *)mp->b_rptr;
684*1676Sjpk 	}
685*1676Sjpk 
686*1676Sjpk 	added = tsol_prepend_option(opt_storage, ipha, MBLKL(mp));
687*1676Sjpk 	if (added == -1)
688*1676Sjpk 		goto param_prob;
689*1676Sjpk 
690*1676Sjpk 	if (addedp != NULL)
691*1676Sjpk 		*addedp = added;
692*1676Sjpk 
693*1676Sjpk 	ASSERT((mp->b_wptr + added) <= DB_LIM(mp));
694*1676Sjpk 	mp->b_wptr += added;
695*1676Sjpk 
696*1676Sjpk 	return (0);
697*1676Sjpk 
698*1676Sjpk param_prob:
699*1676Sjpk 	return (EINVAL);
700*1676Sjpk }
701*1676Sjpk 
702*1676Sjpk /*
703*1676Sjpk  * IPv6 HopOpt extension header for the label option layout:
704*1676Sjpk  *	- One octet giving the type of the 'next extension header'
705*1676Sjpk  *	- Header extension length in 8-byte words, not including the
706*1676Sjpk  *	  1st 8 bytes, but including any pad bytes at the end.
707*1676Sjpk  *	  Eg. A value of 2 means 16 bytes not including the 1st 8 bytes.
708*1676Sjpk  *	- Followed by TLV encoded IPv6 label option. Option layout is
709*1676Sjpk  *		* One octet, IP6OPT_LS
710*1676Sjpk  *		* One octet option length in bytes of the option data following
711*1676Sjpk  *		  the length, but not including any pad bytes at the end.
712*1676Sjpk  *		* Four-octet DOI (IP6LS_DOI_V4)
713*1676Sjpk  *		* One octet suboption, IP6LS_TT_V4
714*1676Sjpk  *		* One octet suboption length in bytes of the suboption
715*1676Sjpk  *		  following the suboption length, including the suboption
716*1676Sjpk  *		  header length, but not including any pad bytes at the end.
717*1676Sjpk  *	- Pad to make the extension header a multiple of 8 bytes.
718*1676Sjpk  *
719*1676Sjpk  * This function returns the contents of 'IPv6 option structure' in the above.
720*1676Sjpk  * i.e starting from the IP6OPT_LS but not including the pad at the end.
721*1676Sjpk  * The user must prepend two octets (either padding or next header / length)
722*1676Sjpk  * and append padding out to the next 8 octet boundary.
723*1676Sjpk  */
724*1676Sjpk int
725*1676Sjpk tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst,
726*1676Sjpk     uchar_t *opt_storage, boolean_t isexempt)
727*1676Sjpk {
728*1676Sjpk 	tsol_tpc_t	*dst_rhtp;
729*1676Sjpk 	ts_label_t	*tsl;
730*1676Sjpk 	uint_t		sec_opt_len;
731*1676Sjpk 	uint32_t	doi;
732*1676Sjpk 	zoneid_t	zoneid;
733*1676Sjpk 	ire_t		*ire, *sire;
734*1676Sjpk 	tsol_ire_gw_secattr_t *attrp;
735*1676Sjpk 	boolean_t	compute_label;
736*1676Sjpk 
737*1676Sjpk 	if (ip6opt_ls == 0)
738*1676Sjpk 		return (EINVAL);
739*1676Sjpk 
740*1676Sjpk 	if (opt_storage != NULL)
741*1676Sjpk 		opt_storage[IPOPT_OLEN] = 0;
742*1676Sjpk 
743*1676Sjpk 	if ((tsl = crgetlabel(credp)) == NULL)
744*1676Sjpk 		return (0);
745*1676Sjpk 
746*1676Sjpk 	/* Always pass multicast */
747*1676Sjpk 	if (IN6_IS_ADDR_MULTICAST(dst))
748*1676Sjpk 		return (0);
749*1676Sjpk 
750*1676Sjpk 	if ((dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE)) == NULL) {
751*1676Sjpk 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__lookupdst__v6,
752*1676Sjpk 		    char *, "destination ip6(1) not in database with creds(2)",
753*1676Sjpk 		    in6_addr_t *, dst, cred_t *, credp);
754*1676Sjpk 		return (EINVAL);
755*1676Sjpk 	}
756*1676Sjpk 
757*1676Sjpk 	zoneid = crgetzoneid(credp);
758*1676Sjpk 
759*1676Sjpk 	/*
760*1676Sjpk 	 * Fill in a V6 label.  If a new format is added here, make certain
761*1676Sjpk 	 * that the maximum size of this label is reflected in sys/tsol/tnet.h
762*1676Sjpk 	 * as TSOL_MAX_IPV6_OPTION.
763*1676Sjpk 	 */
764*1676Sjpk 	compute_label = B_FALSE;
765*1676Sjpk 	switch (dst_rhtp->tpc_tp.host_type) {
766*1676Sjpk 	case UNLABELED:
767*1676Sjpk 		/*
768*1676Sjpk 		 * Only add a label if the unlabeled destination is
769*1676Sjpk 		 * not local or loopback address, that it is
770*1676Sjpk 		 * not on the same subnet, and that the next-hop
771*1676Sjpk 		 * gateway is labeled.
772*1676Sjpk 		 */
773*1676Sjpk 		sire = NULL;
774*1676Sjpk 		ire = ire_cache_lookup_v6(dst, zoneid, tsl);
775*1676Sjpk 
776*1676Sjpk 		if (ire != NULL && (ire->ire_type & (IRE_LOCAL |
777*1676Sjpk 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
778*1676Sjpk 			IRE_REFRELE(ire);
779*1676Sjpk 			TPC_RELE(dst_rhtp);
780*1676Sjpk 			return (0);
781*1676Sjpk 		} else if (ire == NULL) {
782*1676Sjpk 			ire = ire_ftable_lookup_v6(dst, NULL, NULL, 0, NULL,
783*1676Sjpk 			    &sire, zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
784*1676Sjpk 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR));
785*1676Sjpk 		}
786*1676Sjpk 
787*1676Sjpk 		/* no route to destination */
788*1676Sjpk 		if (ire == NULL) {
789*1676Sjpk 			DTRACE_PROBE4(
790*1676Sjpk 			    tx__tnopt__log__info__labeling__routedst__v6,
791*1676Sjpk 			    char *, "No route to unlabeled dest ip6(1)/tpc(2) "
792*1676Sjpk 			    "with creds(3).", in6_addr_t *, dst, tsol_tpc_t *,
793*1676Sjpk 			    dst_rhtp, cred_t *, credp);
794*1676Sjpk 			TPC_RELE(dst_rhtp);
795*1676Sjpk 			return (EINVAL);
796*1676Sjpk 		}
797*1676Sjpk 
798*1676Sjpk 		/*
799*1676Sjpk 		 * Prefix IRE from f-table lookup means that the destination
800*1676Sjpk 		 * is not directly connected; check the next-hop attributes.
801*1676Sjpk 		 */
802*1676Sjpk 		if (sire != NULL) {
803*1676Sjpk 			ASSERT(ire != NULL);
804*1676Sjpk 			IRE_REFRELE(ire);
805*1676Sjpk 			ire = sire;
806*1676Sjpk 		}
807*1676Sjpk 
808*1676Sjpk 		attrp = ire->ire_gw_secattr;
809*1676Sjpk 		if (attrp != NULL && attrp->igsa_rhc != NULL &&
810*1676Sjpk 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type != UNLABELED)
811*1676Sjpk 			compute_label = B_TRUE;
812*1676Sjpk 
813*1676Sjpk 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi ||
814*1676Sjpk 		    (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
815*1676Sjpk 		    &tsl->tsl_label) && (!isexempt ||
816*1676Sjpk 		    (zoneid != GLOBAL_ZONEID && !bldominates(&tsl->tsl_label,
817*1676Sjpk 		    &dst_rhtp->tpc_tp.tp_def_label))))) {
818*1676Sjpk 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v6,
819*1676Sjpk 			    char *, "unlabeled dest ip6(1)/tpc(2) "
820*1676Sjpk 			    "non-matching creds(3)", in6_addr_t *, dst,
821*1676Sjpk 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
822*1676Sjpk 			IRE_REFRELE(ire);
823*1676Sjpk 			TPC_RELE(dst_rhtp);
824*1676Sjpk 			return (EACCES);
825*1676Sjpk 		}
826*1676Sjpk 
827*1676Sjpk 		IRE_REFRELE(ire);
828*1676Sjpk 		break;
829*1676Sjpk 
830*1676Sjpk 	case SUN_CIPSO:
831*1676Sjpk 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
832*1676Sjpk 		    (!_blinrange(&tsl->tsl_label,
833*1676Sjpk 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
834*1676Sjpk 		    !blinlset(&tsl->tsl_label,
835*1676Sjpk 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
836*1676Sjpk 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v6,
837*1676Sjpk 			    char *,
838*1676Sjpk 			    "labeled dest ip6(1)/tpc(2) non-matching creds(3).",
839*1676Sjpk 			    in6_addr_t *, dst, tsol_tpc_t *, dst_rhtp,
840*1676Sjpk 			    cred_t *, credp);
841*1676Sjpk 			TPC_RELE(dst_rhtp);
842*1676Sjpk 			return (EACCES);
843*1676Sjpk 		}
844*1676Sjpk 		compute_label = B_TRUE;
845*1676Sjpk 		break;
846*1676Sjpk 
847*1676Sjpk 	default:
848*1676Sjpk 		TPC_RELE(dst_rhtp);
849*1676Sjpk 		return (EACCES);
850*1676Sjpk 	}
851*1676Sjpk 
852*1676Sjpk 	if (!compute_label) {
853*1676Sjpk 		TPC_RELE(dst_rhtp);
854*1676Sjpk 		return (0);
855*1676Sjpk 	}
856*1676Sjpk 
857*1676Sjpk 	/* compute the CIPSO option */
858*1676Sjpk 	if (opt_storage != NULL)
859*1676Sjpk 		opt_storage += 8;
860*1676Sjpk 	if (dst_rhtp->tpc_tp.host_type != UNLABELED) {
861*1676Sjpk 		sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
862*1676Sjpk 		    tsl->tsl_doi);
863*1676Sjpk 	} else {
864*1676Sjpk 		sec_opt_len = tsol2cipso_tt1(&dst_rhtp->tpc_tp.tp_def_label,
865*1676Sjpk 		    opt_storage, tsl->tsl_doi);
866*1676Sjpk 	}
867*1676Sjpk 	TPC_RELE(dst_rhtp);
868*1676Sjpk 
869*1676Sjpk 	if (sec_opt_len == 0) {
870*1676Sjpk 		DTRACE_PROBE4(tx__tnopt__log__error__labeling__lostops__v6,
871*1676Sjpk 		    char *,
872*1676Sjpk 		    "options lack length for dest ip6(1)/tpc(2) with creds(3).",
873*1676Sjpk 		    in6_addr_t *, dst, tsol_tpc_t *, dst_rhtp, cred_t *, credp);
874*1676Sjpk 		return (EINVAL);
875*1676Sjpk 	}
876*1676Sjpk 
877*1676Sjpk 	if (opt_storage == NULL)
878*1676Sjpk 		return (0);
879*1676Sjpk 
880*1676Sjpk 	if (sec_opt_len < IP_MAX_OPT_LENGTH)
881*1676Sjpk 		opt_storage[sec_opt_len] = IPOPT_EOL;
882*1676Sjpk 
883*1676Sjpk 	/*
884*1676Sjpk 	 * Just in case the option length is odd, round it up to the next even
885*1676Sjpk 	 * multiple.  The IPv6 option definition doesn't like odd numbers for
886*1676Sjpk 	 * some reason.
887*1676Sjpk 	 *
888*1676Sjpk 	 * Length in the overall option header (IP6OPT_LS) does not include the
889*1676Sjpk 	 * option header itself, but the length in the suboption does include
890*1676Sjpk 	 * the suboption header.  Thus, when there's just one suboption, the
891*1676Sjpk 	 * length in the option header is the suboption length plus 4 (for the
892*1676Sjpk 	 * DOI value).
893*1676Sjpk 	 */
894*1676Sjpk 	opt_storage[-2] = IP6LS_TT_V4;
895*1676Sjpk 	opt_storage[-1] = (sec_opt_len + 2 + 1) & ~1;
896*1676Sjpk 	opt_storage[-8] = ip6opt_ls;
897*1676Sjpk 	opt_storage[-7] = opt_storage[-1] + 4;
898*1676Sjpk 	doi = htons(IP6LS_DOI_V4);
899*1676Sjpk 	bcopy(&doi, opt_storage - 6, 4);
900*1676Sjpk 
901*1676Sjpk 	return (0);
902*1676Sjpk }
903*1676Sjpk 
904*1676Sjpk /*
905*1676Sjpk  * Locate the start of the IP6OPT_LS label option and return it.
906*1676Sjpk  * Also return the start of the next non-pad option in after_secoptp.
907*1676Sjpk  * Usually the label option is the first option at least when packets
908*1676Sjpk  * are generated, but for generality we don't assume that on received packets.
909*1676Sjpk  */
910*1676Sjpk uchar_t *
911*1676Sjpk tsol_find_secopt_v6(
912*1676Sjpk     const uchar_t *ip6hbh,	/* Start of the hop-by-hop extension header */
913*1676Sjpk     uint_t hbhlen,		/* Length of the hop-by-hop extension header */
914*1676Sjpk     uchar_t **after_secoptp,	/* Non-pad option following the label option */
915*1676Sjpk     boolean_t *hbh_needed)	/* Is hop-by-hop hdr needed w/o label */
916*1676Sjpk {
917*1676Sjpk 	uint_t	optlen;
918*1676Sjpk 	uint_t	optused;
919*1676Sjpk 	const uchar_t *optptr;
920*1676Sjpk 	uchar_t	opt_type;
921*1676Sjpk 	const uchar_t *secopt = NULL;
922*1676Sjpk 
923*1676Sjpk 	*hbh_needed = B_FALSE;
924*1676Sjpk 	*after_secoptp = NULL;
925*1676Sjpk 	optlen = hbhlen - 2;
926*1676Sjpk 	optptr = ip6hbh + 2;
927*1676Sjpk 	while (optlen != 0) {
928*1676Sjpk 		opt_type = *optptr;
929*1676Sjpk 		if (opt_type == IP6OPT_PAD1) {
930*1676Sjpk 			optptr++;
931*1676Sjpk 			optlen--;
932*1676Sjpk 			continue;
933*1676Sjpk 		}
934*1676Sjpk 		if (optlen == 1)
935*1676Sjpk 			break;
936*1676Sjpk 		optused = 2 + optptr[1];
937*1676Sjpk 		if (optused > optlen)
938*1676Sjpk 			break;
939*1676Sjpk 		/*
940*1676Sjpk 		 * if we get here, ip6opt_ls can
941*1676Sjpk 		 * not be 0 because it will always
942*1676Sjpk 		 * match the IP6OPT_PAD1 above.
943*1676Sjpk 		 * Therefore ip6opt_ls == 0 forces
944*1676Sjpk 		 * this test to always fail here.
945*1676Sjpk 		 */
946*1676Sjpk 		if (opt_type == ip6opt_ls)
947*1676Sjpk 			secopt = optptr;
948*1676Sjpk 		else switch (opt_type) {
949*1676Sjpk 		case IP6OPT_PADN:
950*1676Sjpk 			break;
951*1676Sjpk 		default:
952*1676Sjpk 			/*
953*1676Sjpk 			 * There is at least 1 option other than
954*1676Sjpk 			 * the label option. So the hop-by-hop header is needed
955*1676Sjpk 			 */
956*1676Sjpk 			*hbh_needed = B_TRUE;
957*1676Sjpk 			if (secopt != NULL) {
958*1676Sjpk 				*after_secoptp = (uchar_t *)optptr;
959*1676Sjpk 				return ((uchar_t *)secopt);
960*1676Sjpk 			}
961*1676Sjpk 			break;
962*1676Sjpk 		}
963*1676Sjpk 		optlen -= optused;
964*1676Sjpk 		optptr += optused;
965*1676Sjpk 	}
966*1676Sjpk 	return ((uchar_t *)secopt);
967*1676Sjpk }
968*1676Sjpk 
969*1676Sjpk /*
970*1676Sjpk  * Remove the label option from the hop-by-hop options header if it exists.
971*1676Sjpk  * 'buflen' is the total length of the packet typically b_wptr - b_rptr.
972*1676Sjpk  * Header and data following the label option that is deleted are copied
973*1676Sjpk  * (i.e. slid backward) to the right position.
974*1676Sjpk  */
975*1676Sjpk int
976*1676Sjpk tsol_remove_secopt_v6(ip6_t *ip6h, int buflen)
977*1676Sjpk {
978*1676Sjpk 	uchar_t	*ip6hbh;	/* hop-by-hop header */
979*1676Sjpk 	uint_t	hbhlen;		/* hop-by-hop extension header length */
980*1676Sjpk 	uchar_t *secopt = NULL;
981*1676Sjpk 	uchar_t *after_secopt;
982*1676Sjpk 	uint_t	pad;
983*1676Sjpk 	uint_t	delta;
984*1676Sjpk 	boolean_t hbh_needed;
985*1676Sjpk 
986*1676Sjpk 	/*
987*1676Sjpk 	 * hop-by-hop extension header must appear first, if it does not
988*1676Sjpk 	 * exist, there is no label option.
989*1676Sjpk 	 */
990*1676Sjpk 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
991*1676Sjpk 		return (0);
992*1676Sjpk 
993*1676Sjpk 	ip6hbh = (uchar_t *)&ip6h[1];
994*1676Sjpk 	hbhlen = (ip6hbh[1] + 1) << 3;
995*1676Sjpk 	/*
996*1676Sjpk 	 * Locate the start of the label option if it exists and the end
997*1676Sjpk 	 * of the label option including pads if any.
998*1676Sjpk 	 */
999*1676Sjpk 	secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
1000*1676Sjpk 	    &hbh_needed);
1001*1676Sjpk 	if (secopt == NULL)
1002*1676Sjpk 		return (0);
1003*1676Sjpk 	if (!hbh_needed) {
1004*1676Sjpk 		uchar_t	next_hdr;
1005*1676Sjpk 		/*
1006*1676Sjpk 		 * The label option was the only option in the hop-by-hop
1007*1676Sjpk 		 * header. We don't need the hop-by-hop header itself any
1008*1676Sjpk 		 * longer.
1009*1676Sjpk 		 */
1010*1676Sjpk 		next_hdr = ip6hbh[0];
1011*1676Sjpk 		ovbcopy(ip6hbh + hbhlen, ip6hbh,
1012*1676Sjpk 		    buflen - (IPV6_HDR_LEN + hbhlen));
1013*1676Sjpk 		ip6h->ip6_plen -= hbhlen;
1014*1676Sjpk 		ip6h->ip6_nxt = next_hdr;
1015*1676Sjpk 		return (hbhlen);
1016*1676Sjpk 	}
1017*1676Sjpk 
1018*1676Sjpk 	if (after_secopt == NULL) {
1019*1676Sjpk 		/* There is no option following the label option */
1020*1676Sjpk 		after_secopt = ip6hbh + hbhlen;
1021*1676Sjpk 	}
1022*1676Sjpk 
1023*1676Sjpk 	/*
1024*1676Sjpk 	 * After deleting the label option, we need to slide the headers
1025*1676Sjpk 	 * and data back, while still maintaining the same alignment (module 8)
1026*1676Sjpk 	 * for the other options. So we slide the headers and data back only
1027*1676Sjpk 	 * by an integral multiple of 8 bytes, and fill the remaining bytes
1028*1676Sjpk 	 * with pads.
1029*1676Sjpk 	 */
1030*1676Sjpk 	delta = after_secopt - secopt;
1031*1676Sjpk 	pad = delta % 8;
1032*1676Sjpk 	if (pad == 1) {
1033*1676Sjpk 		secopt[0] = IP6OPT_PAD1;
1034*1676Sjpk 	} else if (pad > 1) {
1035*1676Sjpk 		secopt[0] = IP6OPT_PADN;
1036*1676Sjpk 		secopt[1] = pad - 2;
1037*1676Sjpk 		if (pad > 2)
1038*1676Sjpk 			bzero(&secopt[2], pad - 2);
1039*1676Sjpk 	}
1040*1676Sjpk 	secopt += pad;
1041*1676Sjpk 	delta -= pad;
1042*1676Sjpk 	ovbcopy(after_secopt, secopt,
1043*1676Sjpk 	    (uchar_t *)ip6h + buflen - after_secopt);
1044*1676Sjpk 	ip6hbh[1] -= delta/8;
1045*1676Sjpk 	ip6h->ip6_plen -= delta;
1046*1676Sjpk 
1047*1676Sjpk 	return (delta);
1048*1676Sjpk }
1049*1676Sjpk 
1050*1676Sjpk /*
1051*1676Sjpk  * 'optbuf' contains a CIPSO label embedded in an IPv6 hop-by-hop option,
1052*1676Sjpk  * starting with the IP6OPT_LS option type. The format of this hop-by-hop
1053*1676Sjpk  * option is described in the block comment above tsol_compute_label_v6.
1054*1676Sjpk  * This function prepends this hop-by-hop option before any other hop-by-hop
1055*1676Sjpk  * options in the hop-by-hop header if one already exists, else a new
1056*1676Sjpk  * hop-by-hop header is created and stuffed into the packet following
1057*1676Sjpk  * the IPv6 header. 'buflen' is the total length of the packet i.e.
1058*1676Sjpk  * b_wptr - b_rptr. The caller ensures that there is enough space for the
1059*1676Sjpk  * extra option being added. Header and data following the position where
1060*1676Sjpk  * the label option is inserted are copied (i.e. slid forward) to the right
1061*1676Sjpk  * position.
1062*1676Sjpk  */
1063*1676Sjpk int
1064*1676Sjpk tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen)
1065*1676Sjpk {
1066*1676Sjpk 	/*
1067*1676Sjpk 	 * rawlen is the length of the label option in bytes, not including
1068*1676Sjpk 	 * any pads, starting from the IP6OPT_LS (option type) byte.
1069*1676Sjpk 	 */
1070*1676Sjpk 	uint_t	rawlen;
1071*1676Sjpk 
1072*1676Sjpk 	uint_t	optlen;		/* rawlen rounded to an 8 byte multiple */
1073*1676Sjpk 	uchar_t	*ip6hbh;	/* start of the hop-by-hop extension header */
1074*1676Sjpk 	uint_t	hbhlen;		/* Length of the hop-by-hop extension header */
1075*1676Sjpk 	uint_t	pad_len;
1076*1676Sjpk 	uchar_t	*pad_position;
1077*1676Sjpk 	int	delta;		/* Actual number of bytes inserted */
1078*1676Sjpk 
1079*1676Sjpk 	rawlen = optbuf[1] + 2;	/* Add 2 for the option type, option length */
1080*1676Sjpk 	ip6hbh = (uchar_t *)&ip6h[1];
1081*1676Sjpk 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1082*1676Sjpk 		/*
1083*1676Sjpk 		 * There is a hop-by-hop header present already. In order to
1084*1676Sjpk 		 * preserve the alignment of the other options at the existing
1085*1676Sjpk 		 * value (modulo 8) we need to pad the label option to a
1086*1676Sjpk 		 * multiple of 8 bytes before prepending it to the other
1087*1676Sjpk 		 * options. Slide the extension headers and data forward to
1088*1676Sjpk 		 * accomodate the label option at the start of the hop-by-hop
1089*1676Sjpk 		 * header
1090*1676Sjpk 		 */
1091*1676Sjpk 		delta = optlen = (rawlen + 7) & ~7;
1092*1676Sjpk 		pad_len = optlen - rawlen;
1093*1676Sjpk 		pad_position = ip6hbh + 2 + rawlen;
1094*1676Sjpk 		ovbcopy(ip6hbh + 2, ip6hbh + 2 + optlen,
1095*1676Sjpk 		    buflen - (IPV6_HDR_LEN + 2));
1096*1676Sjpk 		/*
1097*1676Sjpk 		 * Bump up the hop-by-hop extension header length by
1098*1676Sjpk 		 * the number of 8-byte words added
1099*1676Sjpk 		 */
1100*1676Sjpk 		optlen >>= 3;
1101*1676Sjpk 		if (ip6hbh[1] + optlen > 255)
1102*1676Sjpk 			return (-1);
1103*1676Sjpk 		ip6hbh[1] += optlen;
1104*1676Sjpk 	} else {
1105*1676Sjpk 		/*
1106*1676Sjpk 		 * There is no hop-by-hop header in the packet. Construct a
1107*1676Sjpk 		 * new Hop-by-hop extension header (a multiple of 8 bytes).
1108*1676Sjpk 		 * Slide any other extension headers and data forward to
1109*1676Sjpk 		 * accomodate this hop-by-hop header
1110*1676Sjpk 		 */
1111*1676Sjpk 		delta = hbhlen = (2 + rawlen + 7) & ~7; /* +2 for nxthdr, len */
1112*1676Sjpk 		pad_len = hbhlen - (2 + rawlen);
1113*1676Sjpk 		pad_position = ip6hbh + 2 + rawlen;
1114*1676Sjpk 		ovbcopy(ip6hbh, ip6hbh + hbhlen, buflen - IPV6_HDR_LEN);
1115*1676Sjpk 		ip6hbh[0] = ip6h->ip6_nxt;
1116*1676Sjpk 		/*
1117*1676Sjpk 		 * hop-by-hop extension header length in 8-byte words, not
1118*1676Sjpk 		 * including the 1st 8 bytes of the hop-by-hop header.
1119*1676Sjpk 		 */
1120*1676Sjpk 		ip6hbh[1] = (hbhlen >> 3) - 1;
1121*1676Sjpk 		ip6h->ip6_nxt = IPPROTO_HOPOPTS;
1122*1676Sjpk 	}
1123*1676Sjpk 	/*
1124*1676Sjpk 	 * Copy the label option into the hop-by-hop header and insert any
1125*1676Sjpk 	 * needed pads
1126*1676Sjpk 	 */
1127*1676Sjpk 	bcopy(optbuf, ip6hbh + 2, rawlen);
1128*1676Sjpk 	if (pad_len == 1) {
1129*1676Sjpk 		pad_position[0] = IP6OPT_PAD1;
1130*1676Sjpk 	} else if (pad_len > 1) {
1131*1676Sjpk 		pad_position[0] = IP6OPT_PADN;
1132*1676Sjpk 		pad_position[1] = pad_len - 2;
1133*1676Sjpk 		if (pad_len > 2)
1134*1676Sjpk 			bzero(pad_position + 2, pad_len - 2);
1135*1676Sjpk 	}
1136*1676Sjpk 	ip6h->ip6_plen += delta;
1137*1676Sjpk 	return (delta);
1138*1676Sjpk }
1139*1676Sjpk 
1140*1676Sjpk /*
1141*1676Sjpk  * tsol_check_label_v6()
1142*1676Sjpk  *
1143*1676Sjpk  * This routine computes the IP label that should be on the packet based on the
1144*1676Sjpk  * connection and destination information.  It's called only by the IP
1145*1676Sjpk  * forwarding logic, because all internal modules atop IP know how to generate
1146*1676Sjpk  * their own labels.
1147*1676Sjpk  *
1148*1676Sjpk  * Returns:
1149*1676Sjpk  *      0		Label on packet was already correct
1150*1676Sjpk  *      EACCESS		The packet failed the remote host accreditation.
1151*1676Sjpk  *      ENOMEM		Memory allocation failure.
1152*1676Sjpk  */
1153*1676Sjpk int
1154*1676Sjpk tsol_check_label_v6(const cred_t *credp, mblk_t **mpp, int *addedp,
1155*1676Sjpk     boolean_t isexempt)
1156*1676Sjpk {
1157*1676Sjpk 	mblk_t *mp = *mpp;
1158*1676Sjpk 	ip6_t  *ip6h;
1159*1676Sjpk 	/*
1160*1676Sjpk 	 * Label option length is limited to IP_MAX_OPT_LENGTH for
1161*1676Sjpk 	 * symmetry with IPv4. Can be relaxed if needed
1162*1676Sjpk 	 */
1163*1676Sjpk 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
1164*1676Sjpk 	uint_t hlen;
1165*1676Sjpk 	uint_t sec_opt_len; /* label option length not including type, len */
1166*1676Sjpk 	int added;
1167*1676Sjpk 	int retv;
1168*1676Sjpk 	uchar_t	*after_secopt;
1169*1676Sjpk 	uchar_t	*secopt = NULL;
1170*1676Sjpk 	uchar_t	*ip6hbh;
1171*1676Sjpk 	uint_t	hbhlen;
1172*1676Sjpk 	boolean_t hbh_needed;
1173*1676Sjpk 
1174*1676Sjpk 	if (addedp != NULL)
1175*1676Sjpk 		*addedp = 0;
1176*1676Sjpk 
1177*1676Sjpk 	ip6h = (ip6_t *)mp->b_rptr;
1178*1676Sjpk 	retv = tsol_compute_label_v6(credp, &ip6h->ip6_dst, opt_storage,
1179*1676Sjpk 	    isexempt);
1180*1676Sjpk 	if (retv != 0)
1181*1676Sjpk 		return (retv);
1182*1676Sjpk 
1183*1676Sjpk 	sec_opt_len = opt_storage[1];
1184*1676Sjpk 
1185*1676Sjpk 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1186*1676Sjpk 		ip6hbh = (uchar_t *)&ip6h[1];
1187*1676Sjpk 		hbhlen = (ip6hbh[1] + 1) << 3;
1188*1676Sjpk 		secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
1189*1676Sjpk 		    &hbh_needed);
1190*1676Sjpk 	}
1191*1676Sjpk 
1192*1676Sjpk 	if (sec_opt_len == 0 && secopt == NULL) {
1193*1676Sjpk 		/*
1194*1676Sjpk 		 * The packet is not supposed to have a label, and it
1195*1676Sjpk 		 * does not have one currently
1196*1676Sjpk 		 */
1197*1676Sjpk 		return (0);
1198*1676Sjpk 	}
1199*1676Sjpk 	if (secopt != NULL && sec_opt_len != 0 &&
1200*1676Sjpk 	    (bcmp(opt_storage, secopt, sec_opt_len + 2) == 0)) {
1201*1676Sjpk 		/* The packet has the correct label already */
1202*1676Sjpk 		return (0);
1203*1676Sjpk 	}
1204*1676Sjpk 
1205*1676Sjpk 	/*
1206*1676Sjpk 	 * If there is an option there, then it must be the wrong one; delete.
1207*1676Sjpk 	 */
1208*1676Sjpk 	if (secopt != NULL)
1209*1676Sjpk 		mp->b_wptr += tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1210*1676Sjpk 
1211*1676Sjpk 	/*
1212*1676Sjpk 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
1213*1676Sjpk 	 * the hop-by-hop ext header's next header and length fields. Add
1214*1676Sjpk 	 * another 2 bytes for the label option type, len and then round
1215*1676Sjpk 	 * up to the next 8-byte multiple.
1216*1676Sjpk 	 */
1217*1676Sjpk 	hlen = (4 + sec_opt_len + 7) & ~7;
1218*1676Sjpk 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
1219*1676Sjpk 		int copylen;
1220*1676Sjpk 		mblk_t *new_mp;
1221*1676Sjpk 		uint16_t hdr_len;
1222*1676Sjpk 
1223*1676Sjpk 		hdr_len = ip_hdr_length_v6(mp, ip6h);
1224*1676Sjpk 		/*
1225*1676Sjpk 		 * Allocate enough to be meaningful, but not *too* much.
1226*1676Sjpk 		 * Also all the IPv6 extension headers must be in the same mblk
1227*1676Sjpk 		 */
1228*1676Sjpk 		copylen = MBLKL(mp);
1229*1676Sjpk 		if (copylen > 256)
1230*1676Sjpk 			copylen = 256;
1231*1676Sjpk 		if (copylen < hdr_len)
1232*1676Sjpk 			copylen = hdr_len;
1233*1676Sjpk 		new_mp = allocb(hlen + copylen +
1234*1676Sjpk 		    (mp->b_rptr - mp->b_datap->db_base), BPRI_HI);
1235*1676Sjpk 		if (new_mp == NULL)
1236*1676Sjpk 			return (ENOMEM);
1237*1676Sjpk 
1238*1676Sjpk 		/* keep the bias */
1239*1676Sjpk 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
1240*1676Sjpk 		new_mp->b_wptr = new_mp->b_rptr + copylen;
1241*1676Sjpk 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
1242*1676Sjpk 		new_mp->b_cont = mp;
1243*1676Sjpk 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
1244*1676Sjpk 			new_mp->b_cont = mp->b_cont;
1245*1676Sjpk 			freeb(mp);
1246*1676Sjpk 		}
1247*1676Sjpk 		*mpp = mp = new_mp;
1248*1676Sjpk 		ip6h = (ip6_t *)mp->b_rptr;
1249*1676Sjpk 	}
1250*1676Sjpk 
1251*1676Sjpk 	added = tsol_prepend_option_v6(opt_storage, ip6h, MBLKL(mp));
1252*1676Sjpk 	if (added == -1)
1253*1676Sjpk 		goto param_prob;
1254*1676Sjpk 
1255*1676Sjpk 	if (addedp != NULL)
1256*1676Sjpk 		*addedp = added;
1257*1676Sjpk 
1258*1676Sjpk 	ASSERT(mp->b_wptr + added <= DB_LIM(mp));
1259*1676Sjpk 	mp->b_wptr += added;
1260*1676Sjpk 
1261*1676Sjpk 	return (0);
1262*1676Sjpk 
1263*1676Sjpk param_prob:
1264*1676Sjpk 	return (EINVAL);
1265*1676Sjpk }
1266*1676Sjpk 
1267*1676Sjpk /*
1268*1676Sjpk  * Update the given IPv6 "sticky options" structure to contain the provided
1269*1676Sjpk  * label, which is encoded as an IPv6 option.  Existing label is removed if
1270*1676Sjpk  * necessary, and storage is allocated/freed/resized.
1271*1676Sjpk  *
1272*1676Sjpk  * Returns 0 on success, errno on failure.
1273*1676Sjpk  */
1274*1676Sjpk int
1275*1676Sjpk tsol_update_sticky(ip6_pkt_t *ipp, uint_t *labellen, const uchar_t *labelopt)
1276*1676Sjpk {
1277*1676Sjpk 	int rawlen, optlen, newlen;
1278*1676Sjpk 	uchar_t *newopts;
1279*1676Sjpk 
1280*1676Sjpk 	/*
1281*1676Sjpk 	 * rawlen is the size of the IPv6 label to be inserted from labelopt.
1282*1676Sjpk 	 * optlen is the total length of that option, including any necessary
1283*1676Sjpk 	 * headers and padding.  newlen is the new size of the total hop-by-hop
1284*1676Sjpk 	 * options buffer, including user options.
1285*1676Sjpk 	 */
1286*1676Sjpk 	if ((rawlen = labelopt[1]) != 0) {
1287*1676Sjpk 		rawlen += 2;	/* add in header size */
1288*1676Sjpk 		optlen = (2 + rawlen + 7) & ~7;
1289*1676Sjpk 	} else {
1290*1676Sjpk 		optlen = 0;
1291*1676Sjpk 	}
1292*1676Sjpk 	newlen = ipp->ipp_hopoptslen + optlen - *labellen;
1293*1676Sjpk 	if (optlen > *labellen) {
1294*1676Sjpk 		if (newlen > IP6_MAX_OPT_LENGTH)
1295*1676Sjpk 			return (EHOSTUNREACH);
1296*1676Sjpk 		/* If the label is bigger than last time, then reallocate */
1297*1676Sjpk 		newopts = kmem_alloc(newlen, KM_NOSLEEP);
1298*1676Sjpk 		if (newopts == NULL)
1299*1676Sjpk 			return (ENOMEM);
1300*1676Sjpk 		/*
1301*1676Sjpk 		 * If the user has hop-by-hop stickyoptions set, then copy his
1302*1676Sjpk 		 * options in after the security label.
1303*1676Sjpk 		 */
1304*1676Sjpk 		if (ipp->ipp_hopoptslen > *labellen) {
1305*1676Sjpk 			bcopy(ipp->ipp_hopopts + *labellen, newopts + optlen,
1306*1676Sjpk 			    ipp->ipp_hopoptslen - *labellen);
1307*1676Sjpk 			/*
1308*1676Sjpk 			 * Stomp out any header gunk here - this was the
1309*1676Sjpk 			 * previous next-header and option length field.
1310*1676Sjpk 			 */
1311*1676Sjpk 			newopts[optlen] = IP6OPT_PADN;
1312*1676Sjpk 			newopts[optlen + 1] = 0;
1313*1676Sjpk 		}
1314*1676Sjpk 		if (ipp->ipp_hopopts != NULL)
1315*1676Sjpk 			kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1316*1676Sjpk 		ipp->ipp_hopopts = (ip6_hbh_t *)newopts;
1317*1676Sjpk 	} else if (optlen < *labellen) {
1318*1676Sjpk 		/* If the label got smaller, then adjust downward. */
1319*1676Sjpk 		if (newlen == 0 && ipp->ipp_hopopts != NULL) {
1320*1676Sjpk 			kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1321*1676Sjpk 			ipp->ipp_hopopts = NULL;
1322*1676Sjpk 			ipp->ipp_fields &= ~IPPF_HOPOPTS;
1323*1676Sjpk 		}
1324*1676Sjpk 		/* If the user still has options, move those back. */
1325*1676Sjpk 		if (ipp->ipp_hopoptslen > *labellen) {
1326*1676Sjpk 			ovbcopy(ipp->ipp_hopopts + *labellen,
1327*1676Sjpk 			    ipp->ipp_hopopts + optlen,
1328*1676Sjpk 			    ipp->ipp_hopoptslen - *labellen);
1329*1676Sjpk 		}
1330*1676Sjpk 	}
1331*1676Sjpk 	ipp->ipp_hopoptslen = newlen;
1332*1676Sjpk 	*labellen = optlen;
1333*1676Sjpk 
1334*1676Sjpk 	newopts = (uchar_t *)ipp->ipp_hopopts;
1335*1676Sjpk 
1336*1676Sjpk 	/* If there are any options, then fix up reported length */
1337*1676Sjpk 	if (newlen > 0) {
1338*1676Sjpk 		newopts[1] = (newlen + 7) / 8 - 1;
1339*1676Sjpk 		ipp->ipp_fields |= IPPF_HOPOPTS;
1340*1676Sjpk 	}
1341*1676Sjpk 
1342*1676Sjpk 	/* If there's a label, then insert it now */
1343*1676Sjpk 	if (optlen > 0) {
1344*1676Sjpk 		/* skip next-header and length fields */
1345*1676Sjpk 		newopts += 2;
1346*1676Sjpk 		bcopy(labelopt, newopts, rawlen);
1347*1676Sjpk 		newopts += rawlen;
1348*1676Sjpk 		/* make sure padding comes out right */
1349*1676Sjpk 		optlen -= 2 + rawlen;
1350*1676Sjpk 		if (optlen == 1) {
1351*1676Sjpk 			newopts[0] = IP6OPT_PAD1;
1352*1676Sjpk 		} else if (optlen > 1) {
1353*1676Sjpk 			newopts[0] = IP6OPT_PADN;
1354*1676Sjpk 			optlen -=  2;
1355*1676Sjpk 			newopts[1] = optlen;
1356*1676Sjpk 			if (optlen > 0)
1357*1676Sjpk 				bzero(newopts + 2, optlen);
1358*1676Sjpk 		}
1359*1676Sjpk 	}
1360*1676Sjpk 	return (0);
1361*1676Sjpk }
1362*1676Sjpk 
1363*1676Sjpk int
1364*1676Sjpk tsol_update_options(uchar_t **opts, uint_t *totlen, uint_t *labellen,
1365*1676Sjpk     const uchar_t *labelopt)
1366*1676Sjpk {
1367*1676Sjpk 	int optlen, newlen;
1368*1676Sjpk 	uchar_t *newopts;
1369*1676Sjpk 
1370*1676Sjpk 	optlen = (labelopt[IPOPT_OLEN] + 3) & ~3;
1371*1676Sjpk 	newlen = *totlen + optlen - *labellen;
1372*1676Sjpk 	if (optlen > *labellen) {
1373*1676Sjpk 		if (newlen > IP_MAX_OPT_LENGTH)
1374*1676Sjpk 			return (EHOSTUNREACH);
1375*1676Sjpk 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1376*1676Sjpk 		if (newopts == NULL)
1377*1676Sjpk 			return (ENOMEM);
1378*1676Sjpk 		if (*totlen > *labellen) {
1379*1676Sjpk 			bcopy(*opts + *labellen, newopts + optlen,
1380*1676Sjpk 			    *totlen - *labellen);
1381*1676Sjpk 		}
1382*1676Sjpk 		if (*opts != NULL)
1383*1676Sjpk 			mi_free((char *)*opts);
1384*1676Sjpk 		*opts = newopts;
1385*1676Sjpk 	} else if (optlen < *labellen) {
1386*1676Sjpk 		if (newlen == 0 && *opts != NULL) {
1387*1676Sjpk 			mi_free((char *)*opts);
1388*1676Sjpk 			*opts = NULL;
1389*1676Sjpk 		}
1390*1676Sjpk 		if (*totlen > *labellen) {
1391*1676Sjpk 			ovbcopy(*opts + *labellen, *opts + optlen,
1392*1676Sjpk 			    *totlen - *labellen);
1393*1676Sjpk 		}
1394*1676Sjpk 	}
1395*1676Sjpk 	*totlen = newlen;
1396*1676Sjpk 	*labellen = optlen;
1397*1676Sjpk 	if (optlen > 0) {
1398*1676Sjpk 		newopts = *opts;
1399*1676Sjpk 		bcopy(labelopt, newopts, optlen);
1400*1676Sjpk 		/* check if there are user-supplied options that follow */
1401*1676Sjpk 		if (optlen < newlen) {
1402*1676Sjpk 			/* compute amount of embedded alignment needed */
1403*1676Sjpk 			optlen -= newopts[IPOPT_OLEN];
1404*1676Sjpk 			newopts += newopts[IPOPT_OLEN];
1405*1676Sjpk 			while (--optlen >= 0)
1406*1676Sjpk 				*newopts++ = IPOPT_NOP;
1407*1676Sjpk 		} else if (optlen != newopts[IPOPT_OLEN]) {
1408*1676Sjpk 			/*
1409*1676Sjpk 			 * The label option is the only option and it is
1410*1676Sjpk 			 * not a multiple of 4 bytes.
1411*1676Sjpk 			 */
1412*1676Sjpk 			optlen -= newopts[IPOPT_OLEN];
1413*1676Sjpk 			newopts += newopts[IPOPT_OLEN];
1414*1676Sjpk 			while (--optlen >= 0)
1415*1676Sjpk 				*newopts++ = IPOPT_EOL;
1416*1676Sjpk 		}
1417*1676Sjpk 	}
1418*1676Sjpk 	return (0);
1419*1676Sjpk }
1420*1676Sjpk 
1421*1676Sjpk /*
1422*1676Sjpk  * This does the bulk of the processing for setting IPPROTO_IP {T_,}IP_OPTIONS.
1423*1676Sjpk  */
1424*1676Sjpk boolean_t
1425*1676Sjpk tsol_option_set(uchar_t **opts, uint_t *optlen, uint_t labellen,
1426*1676Sjpk     const uchar_t *useropts, uint_t userlen)
1427*1676Sjpk {
1428*1676Sjpk 	int newlen;
1429*1676Sjpk 	uchar_t *newopts;
1430*1676Sjpk 
1431*1676Sjpk 	newlen = userlen + labellen;
1432*1676Sjpk 	if (newlen > *optlen) {
1433*1676Sjpk 		/* need more room */
1434*1676Sjpk 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1435*1676Sjpk 		if (newopts == NULL)
1436*1676Sjpk 			return (ENOMEM);
1437*1676Sjpk 		/*
1438*1676Sjpk 		 * The supplied *opts can't be NULL in this case,
1439*1676Sjpk 		 * since there's an existing label.
1440*1676Sjpk 		 */
1441*1676Sjpk 		if (labellen > 0)
1442*1676Sjpk 			bcopy(*opts, newopts, labellen);
1443*1676Sjpk 		if (*opts != NULL)
1444*1676Sjpk 			mi_free((char *)*opts);
1445*1676Sjpk 		*opts = newopts;
1446*1676Sjpk 	}
1447*1676Sjpk 
1448*1676Sjpk 	if (newlen == 0) {
1449*1676Sjpk 		/* special case -- no remaining IP options at all */
1450*1676Sjpk 		if (*opts != NULL) {
1451*1676Sjpk 			mi_free((char *)*opts);
1452*1676Sjpk 			*opts = NULL;
1453*1676Sjpk 		}
1454*1676Sjpk 	} else if (userlen > 0) {
1455*1676Sjpk 		/* merge in the user's options */
1456*1676Sjpk 		newopts = *opts;
1457*1676Sjpk 		if (labellen > 0) {
1458*1676Sjpk 			int extra = labellen - newopts[IPOPT_OLEN];
1459*1676Sjpk 
1460*1676Sjpk 			newopts += newopts[IPOPT_OLEN];
1461*1676Sjpk 			while (--extra >= 0)
1462*1676Sjpk 				*newopts++ = IPOPT_NOP;
1463*1676Sjpk 		}
1464*1676Sjpk 		bcopy(useropts, newopts, userlen);
1465*1676Sjpk 	}
1466*1676Sjpk 
1467*1676Sjpk 	*optlen = newlen;
1468*1676Sjpk 	return (0);
1469*1676Sjpk }
1470