xref: /onnv-gate/usr/src/uts/common/inet/ip/tn_ipopt.c (revision 9710:d02d4803985d)
11676Sjpk /*
21676Sjpk  * CDDL HEADER START
31676Sjpk  *
41676Sjpk  * The contents of this file are subject to the terms of the
51676Sjpk  * Common Development and Distribution License (the "License").
61676Sjpk  * You may not use this file except in compliance with the License.
71676Sjpk  *
81676Sjpk  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91676Sjpk  * or http://www.opensolaris.org/os/licensing.
101676Sjpk  * See the License for the specific language governing permissions
111676Sjpk  * and limitations under the License.
121676Sjpk  *
131676Sjpk  * When distributing Covered Code, include this CDDL HEADER in each
141676Sjpk  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151676Sjpk  * If applicable, add the following below this CDDL HEADER, with the
161676Sjpk  * fields enclosed by brackets "[]" replaced with your own identifying
171676Sjpk  * information: Portions Copyright [yyyy] [name of copyright owner]
181676Sjpk  *
191676Sjpk  * CDDL HEADER END
201676Sjpk  */
211676Sjpk /*
228778SErik.Nordmark@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
231676Sjpk  * Use is subject to license terms.
241676Sjpk  */
251676Sjpk 
261676Sjpk #include <sys/types.h>
271676Sjpk #include <sys/systm.h>
281676Sjpk #include <sys/kmem.h>
291676Sjpk #include <sys/disp.h>
301676Sjpk #include <sys/stream.h>
311676Sjpk #include <sys/strsubr.h>
321676Sjpk #include <sys/strsun.h>
331676Sjpk #include <sys/policy.h>
341676Sjpk #include <sys/tsol/label_macro.h>
351676Sjpk #include <sys/tsol/tndb.h>
361676Sjpk #include <sys/tsol/tnet.h>
371676Sjpk #include <inet/ip.h>
381676Sjpk #include <inet/ip6.h>
391676Sjpk #include <inet/tcp.h>
401676Sjpk #include <inet/ipclassifier.h>
411676Sjpk #include <inet/ip_ire.h>
422535Ssangeeta #include <inet/ip_ftable.h>
431676Sjpk 
441676Sjpk /*
451676Sjpk  * This routine takes a sensitivity label as input and creates a CIPSO
461676Sjpk  * option in the specified buffer.  It returns the size of the CIPSO option.
471676Sjpk  * If the sensitivity label is too large for the CIPSO option, then 0
481676Sjpk  * is returned.
491676Sjpk  *
501676Sjpk  * tsol2cipso_tt1 returns 0 for failure and greater than 0 for success
511676Sjpk  * (more accurately, success means a return value between 10 and 40).
521676Sjpk  */
531676Sjpk 
541676Sjpk static int
551676Sjpk tsol2cipso_tt1(const bslabel_t *sl, unsigned char *cop, uint32_t doi)
561676Sjpk {
571676Sjpk 	struct cipso_tag_type_1 *tt1;
581676Sjpk 	const _bslabel_impl_t *bsl;
591676Sjpk 	const uchar_t *ucp;
601676Sjpk 	int i;
611676Sjpk 
621676Sjpk 	if (doi == 0)
631676Sjpk 		return (0);
641676Sjpk 
651676Sjpk 	/* check for Admin High sensitivity label */
661676Sjpk 	if (blequal(sl, label2bslabel(l_admin_high)))
671676Sjpk 		return (0);
681676Sjpk 
691676Sjpk 	/* check whether classification will fit in one octet */
701676Sjpk 	bsl = (const _bslabel_impl_t *)sl;
711676Sjpk 	if (LCLASS(bsl) & 0xFF00)
721676Sjpk 		return (0);
731676Sjpk 
741676Sjpk 	/*
751676Sjpk 	 * Check whether compartments will fit in 30 octets.
761676Sjpk 	 * Compartments 241 - 256 are not allowed.
771676Sjpk 	 */
781676Sjpk 	if (ntohl(bsl->compartments.c8) & 0x0000FFFF)
791676Sjpk 		return (0);
801676Sjpk 
811676Sjpk 	/*
821676Sjpk 	 * Compute option length and tag length.
831676Sjpk 	 * 'p' points to the last two bytes in the Sensitivity Label's
841676Sjpk 	 * compartments; these cannot be mapped into CIPSO compartments.
851676Sjpk 	 */
861676Sjpk 	ucp = (const uchar_t *)&bsl->compartments.c8 + 2;
871676Sjpk 	while (--ucp >= (const uchar_t *)&bsl->compartments.c1)
881676Sjpk 		if (*ucp != 0)
891676Sjpk 			break;
901676Sjpk 
911676Sjpk 	i =  ucp - (const uchar_t *)&bsl->compartments.c1 + 1;
921676Sjpk 
931676Sjpk 	if (cop == NULL)
941676Sjpk 		return (10 + i);
951676Sjpk 
961676Sjpk 	doi = htonl(doi);
971676Sjpk 	ucp = (const uchar_t *)&doi;
981676Sjpk 	cop[IPOPT_OPTVAL] = IPOPT_COMSEC;
991676Sjpk 	cop[IPOPT_OLEN] = 10 + i;
1001676Sjpk 	cop[IPOPT_OLEN+1] = ucp[0];
1011676Sjpk 	cop[IPOPT_OLEN+2] = ucp[1];
1021676Sjpk 	cop[IPOPT_OLEN+3] = ucp[2];
1031676Sjpk 	cop[IPOPT_OLEN+4] = ucp[3];
1041676Sjpk 	tt1 = (struct cipso_tag_type_1 *)&cop[IPOPT_OLEN + 5];
1051676Sjpk 	tt1->tag_type = 1;
1061676Sjpk 	tt1->tag_align = 0;
1071676Sjpk 	tt1->tag_sl = LCLASS(bsl);
1081676Sjpk 	tt1->tag_length = 4 + i;
1091676Sjpk 
1101676Sjpk 	bcopy(&bsl->compartments.c1, tt1->tag_cat, i);
1111676Sjpk 
1121676Sjpk 	return (cop[IPOPT_OLEN]);
1131676Sjpk }
1141676Sjpk 
1151676Sjpk /*
1161676Sjpk  * The following routine copies a datagram's option into the specified buffer
1171676Sjpk  * (if buffer pointer is non-null), or returns a pointer to the label within
1181676Sjpk  * the streams message (if buffer is null).  In both cases, tsol_get_option
1191676Sjpk  * returns the option's type.
1201676Sjpk  *
1211676Sjpk  * tsol_get_option assumes that the specified buffer is large enough to
1221676Sjpk  * hold the largest valid CIPSO option.  Since the total number of
1231676Sjpk  * IP header options cannot exceed 40 bytes, a 40 byte buffer is a good choice.
1241676Sjpk  */
1251676Sjpk 
1261676Sjpk tsol_ip_label_t
1271676Sjpk tsol_get_option(mblk_t *mp, uchar_t **buffer)
1281676Sjpk {
1291676Sjpk 	ipha_t	*ipha;
1301676Sjpk 	uchar_t	*opt;
1311676Sjpk 	uint32_t	totallen;
1321676Sjpk 	uint32_t	optval;
1331676Sjpk 	uint32_t	optlen;
1341676Sjpk 
1351676Sjpk 	ipha = (ipha_t *)mp->b_rptr;
1361676Sjpk 
1371676Sjpk 	/*
1381676Sjpk 	 * Get length (in 4 byte octets) of IP header options.
1391676Sjpk 	 * If header doesn't contain options, then return OPT_NONE.
1401676Sjpk 	 */
1411676Sjpk 	totallen = ipha->ipha_version_and_hdr_length -
1421676Sjpk 	    (uint8_t)((IP_VERSION << 4) + IP_SIMPLE_HDR_LENGTH_IN_WORDS);
1431676Sjpk 
1441676Sjpk 	if (totallen == 0)
1451676Sjpk 		return (OPT_NONE);
1461676Sjpk 
1471676Sjpk 	totallen <<= 2;
1481676Sjpk 
1491676Sjpk 	/*
1501676Sjpk 	 * Search for CIPSO option.
1511676Sjpk 	 * If no such option is present, then return OPT_NONE.
1521676Sjpk 	 */
1531676Sjpk 	opt = (uchar_t *)&ipha[1];
1541676Sjpk 	while (totallen != 0) {
1551676Sjpk 		switch (optval = opt[IPOPT_OPTVAL]) {
1561676Sjpk 		case IPOPT_EOL:
1571676Sjpk 			return (OPT_NONE);
1581676Sjpk 		case IPOPT_NOP:
1591676Sjpk 			optlen = 1;
1601676Sjpk 			break;
1611676Sjpk 		default:
1621676Sjpk 			if (totallen <= IPOPT_OLEN)
1631676Sjpk 				return (OPT_NONE);
1641676Sjpk 			optlen = opt[IPOPT_OLEN];
1651676Sjpk 			if (optlen < 2)
1661676Sjpk 				return (OPT_NONE);
1671676Sjpk 		}
1681676Sjpk 		if (optlen > totallen)
1691676Sjpk 			return (OPT_NONE);
1701676Sjpk 		/*
1711676Sjpk 		 * Copy pointer to option into '*buffer' and
1721676Sjpk 		 * return the option type.
1731676Sjpk 		 */
1741676Sjpk 		switch (optval) {
1751676Sjpk 		case IPOPT_COMSEC:
1761676Sjpk 			*buffer = opt;
1771676Sjpk 			if (TSOL_CIPSO_TAG_OFFSET < optlen &&
1781676Sjpk 			    opt[TSOL_CIPSO_TAG_OFFSET] == 1)
1791676Sjpk 				return (OPT_CIPSO);
1801676Sjpk 			return (OPT_NONE);
1811676Sjpk 		}
1821676Sjpk 		totallen -= optlen;
1831676Sjpk 		opt += optlen;
1841676Sjpk 	}
1851676Sjpk 	return (OPT_NONE);
1861676Sjpk }
1871676Sjpk 
1881676Sjpk /*
189*9710SKen.Powell@Sun.COM  * tsol_check_dest()
190*9710SKen.Powell@Sun.COM  *
191*9710SKen.Powell@Sun.COM  * This routine verifies if a destination is allowed to recieve messages
192*9710SKen.Powell@Sun.COM  * based on the message cred's security label. If any adjustments to
193*9710SKen.Powell@Sun.COM  * the cred are needed due to the connection's MAC-exempt status or
194*9710SKen.Powell@Sun.COM  * the destination's ability to receive labels, an "effective cred"
195*9710SKen.Powell@Sun.COM  * will be returned.
196*9710SKen.Powell@Sun.COM  *
197*9710SKen.Powell@Sun.COM  * On successful return, effective_cred will point to the new creds needed
198*9710SKen.Powell@Sun.COM  * or will be NULL if new creds aren't needed. On error, effective_cred
199*9710SKen.Powell@Sun.COM  * is NULL.
200*9710SKen.Powell@Sun.COM  *
201*9710SKen.Powell@Sun.COM  * Returns:
202*9710SKen.Powell@Sun.COM  *	0		Have or constructed appropriate credentials
203*9710SKen.Powell@Sun.COM  *	EHOSTUNREACH	The credentials failed the remote host accreditation
204*9710SKen.Powell@Sun.COM  *      ENOMEM		Memory allocation failure
205*9710SKen.Powell@Sun.COM  */
206*9710SKen.Powell@Sun.COM int
207*9710SKen.Powell@Sun.COM tsol_check_dest(const cred_t *credp, const void *dst, uchar_t version,
208*9710SKen.Powell@Sun.COM     boolean_t mac_exempt, cred_t **effective_cred)
209*9710SKen.Powell@Sun.COM {
210*9710SKen.Powell@Sun.COM 	ts_label_t	*tsl, *newtsl = NULL;
211*9710SKen.Powell@Sun.COM 	tsol_tpc_t	*dst_rhtp;
212*9710SKen.Powell@Sun.COM 	zoneid_t	zoneid;
213*9710SKen.Powell@Sun.COM 
214*9710SKen.Powell@Sun.COM 	*effective_cred = NULL;
215*9710SKen.Powell@Sun.COM 	ASSERT(version == IPV4_VERSION ||
216*9710SKen.Powell@Sun.COM 	    (version == IPV6_VERSION &&
217*9710SKen.Powell@Sun.COM 	    !IN6_IS_ADDR_V4MAPPED((in6_addr_t *)dst)));
218*9710SKen.Powell@Sun.COM 
219*9710SKen.Powell@Sun.COM 	/* Always pass kernel level communication (NULL label) */
220*9710SKen.Powell@Sun.COM 	if ((tsl = crgetlabel(credp)) == NULL) {
221*9710SKen.Powell@Sun.COM 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allownull,
222*9710SKen.Powell@Sun.COM 		    char *, "destination ip(1) with null cred was passed",
223*9710SKen.Powell@Sun.COM 		    ipaddr_t, dst);
224*9710SKen.Powell@Sun.COM 		return (0);
225*9710SKen.Powell@Sun.COM 	}
226*9710SKen.Powell@Sun.COM 
227*9710SKen.Powell@Sun.COM 	/* Always pass multicast */
228*9710SKen.Powell@Sun.COM 	if (version == IPV4_VERSION &&
229*9710SKen.Powell@Sun.COM 	    CLASSD(*(ipaddr_t *)dst)) {
230*9710SKen.Powell@Sun.COM 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allowmult,
231*9710SKen.Powell@Sun.COM 		    char *, "destination ip(1) with multicast dest was passed",
232*9710SKen.Powell@Sun.COM 		    ipaddr_t, dst);
233*9710SKen.Powell@Sun.COM 		return (0);
234*9710SKen.Powell@Sun.COM 	} else if (version == IPV6_VERSION &&
235*9710SKen.Powell@Sun.COM 	    IN6_IS_ADDR_MULTICAST((in6_addr_t *)dst)) {
236*9710SKen.Powell@Sun.COM 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allowmult_v6,
237*9710SKen.Powell@Sun.COM 		    char *, "destination ip(1) with multicast dest was passed",
238*9710SKen.Powell@Sun.COM 		    in6_addr_t *, dst);
239*9710SKen.Powell@Sun.COM 		return (0);
240*9710SKen.Powell@Sun.COM 	}
241*9710SKen.Powell@Sun.COM 
242*9710SKen.Powell@Sun.COM 	/* Never pass an undefined destination */
243*9710SKen.Powell@Sun.COM 	if ((dst_rhtp = find_tpc(dst, version, B_FALSE)) == NULL) {
244*9710SKen.Powell@Sun.COM 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__lookupdst,
245*9710SKen.Powell@Sun.COM 		    char *, "destination ip(1) not in tn database.",
246*9710SKen.Powell@Sun.COM 		    void *, dst);
247*9710SKen.Powell@Sun.COM 		return (EHOSTUNREACH);
248*9710SKen.Powell@Sun.COM 	}
249*9710SKen.Powell@Sun.COM 
250*9710SKen.Powell@Sun.COM 	switch (dst_rhtp->tpc_tp.host_type) {
251*9710SKen.Powell@Sun.COM 	case UNLABELED:
252*9710SKen.Powell@Sun.COM 		/*
253*9710SKen.Powell@Sun.COM 		 * Can talk to unlabeled hosts if
254*9710SKen.Powell@Sun.COM 		 * (1) zone's label matches the default label, or
255*9710SKen.Powell@Sun.COM 		 * (2) SO_MAC_EXEMPT is on and we dominate the peer's label
256*9710SKen.Powell@Sun.COM 		 * (3) SO_MAC_EXEMPT is on and this is the global zone
257*9710SKen.Powell@Sun.COM 		 */
258*9710SKen.Powell@Sun.COM 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi) {
259*9710SKen.Powell@Sun.COM 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__doi,
260*9710SKen.Powell@Sun.COM 			    char *, "unlabeled dest ip(1)/tpc(2) doi does "
261*9710SKen.Powell@Sun.COM 			    "not match msg label(3) doi.", void *, dst,
262*9710SKen.Powell@Sun.COM 			    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
263*9710SKen.Powell@Sun.COM 			TPC_RELE(dst_rhtp);
264*9710SKen.Powell@Sun.COM 			return (EHOSTUNREACH);
265*9710SKen.Powell@Sun.COM 		}
266*9710SKen.Powell@Sun.COM 		if (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
267*9710SKen.Powell@Sun.COM 		    &tsl->tsl_label)) {
268*9710SKen.Powell@Sun.COM 			zoneid = crgetzoneid(credp);
269*9710SKen.Powell@Sun.COM 			if (!mac_exempt ||
270*9710SKen.Powell@Sun.COM 			    !(zoneid == GLOBAL_ZONEID ||
271*9710SKen.Powell@Sun.COM 			    bldominates(&tsl->tsl_label,
272*9710SKen.Powell@Sun.COM 			    &dst_rhtp->tpc_tp.tp_def_label))) {
273*9710SKen.Powell@Sun.COM 				DTRACE_PROBE4(
274*9710SKen.Powell@Sun.COM 				    tx__tnopt__log__info__labeling__mac,
275*9710SKen.Powell@Sun.COM 				    char *, "unlabeled dest ip(1)/tpc(2) does "
276*9710SKen.Powell@Sun.COM 				    "not match msg label(3).", void *, dst,
277*9710SKen.Powell@Sun.COM 				    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
278*9710SKen.Powell@Sun.COM 				TPC_RELE(dst_rhtp);
279*9710SKen.Powell@Sun.COM 				return (EHOSTUNREACH);
280*9710SKen.Powell@Sun.COM 			}
281*9710SKen.Powell@Sun.COM 			/*
282*9710SKen.Powell@Sun.COM 			 * This is a downlabel MAC-exempt exchange.
283*9710SKen.Powell@Sun.COM 			 * Use the remote destination's default label
284*9710SKen.Powell@Sun.COM 			 * as the label of the message data.
285*9710SKen.Powell@Sun.COM 			 */
286*9710SKen.Powell@Sun.COM 			if ((newtsl = labelalloc(&dst_rhtp->tpc_tp.tp_def_label,
287*9710SKen.Powell@Sun.COM 			    dst_rhtp->tpc_tp.tp_doi, KM_NOSLEEP)) == NULL) {
288*9710SKen.Powell@Sun.COM 				TPC_RELE(dst_rhtp);
289*9710SKen.Powell@Sun.COM 				return (ENOMEM);
290*9710SKen.Powell@Sun.COM 			}
291*9710SKen.Powell@Sun.COM 			newtsl->tsl_flags |= TSLF_UNLABELED;
292*9710SKen.Powell@Sun.COM 
293*9710SKen.Powell@Sun.COM 		} else if (!(tsl->tsl_flags & TSLF_UNLABELED)) {
294*9710SKen.Powell@Sun.COM 			/*
295*9710SKen.Powell@Sun.COM 			 * The security labels are the same but we need
296*9710SKen.Powell@Sun.COM 			 * to flag that the remote node is unlabeled.
297*9710SKen.Powell@Sun.COM 			 */
298*9710SKen.Powell@Sun.COM 			if ((newtsl = labeldup(tsl, KM_NOSLEEP)) == NULL) {
299*9710SKen.Powell@Sun.COM 				TPC_RELE(dst_rhtp);
300*9710SKen.Powell@Sun.COM 				return (ENOMEM);
301*9710SKen.Powell@Sun.COM 			}
302*9710SKen.Powell@Sun.COM 			newtsl->tsl_flags |= TSLF_UNLABELED;
303*9710SKen.Powell@Sun.COM 		}
304*9710SKen.Powell@Sun.COM 		break;
305*9710SKen.Powell@Sun.COM 
306*9710SKen.Powell@Sun.COM 	case SUN_CIPSO:
307*9710SKen.Powell@Sun.COM 		/*
308*9710SKen.Powell@Sun.COM 		 * Can talk to labeled hosts if zone's label is within target's
309*9710SKen.Powell@Sun.COM 		 * label range or set.
310*9710SKen.Powell@Sun.COM 		 */
311*9710SKen.Powell@Sun.COM 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
312*9710SKen.Powell@Sun.COM 		    (!_blinrange(&tsl->tsl_label,
313*9710SKen.Powell@Sun.COM 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
314*9710SKen.Powell@Sun.COM 		    !blinlset(&tsl->tsl_label,
315*9710SKen.Powell@Sun.COM 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
316*9710SKen.Powell@Sun.COM 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac,
317*9710SKen.Powell@Sun.COM 			    char *, "labeled dest ip(1)/tpc(2) does not "
318*9710SKen.Powell@Sun.COM 			    "match msg label(3).", void *, dst,
319*9710SKen.Powell@Sun.COM 			    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
320*9710SKen.Powell@Sun.COM 			TPC_RELE(dst_rhtp);
321*9710SKen.Powell@Sun.COM 			return (EHOSTUNREACH);
322*9710SKen.Powell@Sun.COM 		}
323*9710SKen.Powell@Sun.COM 		if (tsl->tsl_flags & TSLF_UNLABELED) {
324*9710SKen.Powell@Sun.COM 			/*
325*9710SKen.Powell@Sun.COM 			 * The security label is a match but we need to
326*9710SKen.Powell@Sun.COM 			 * clear the unlabeled flag for this remote node.
327*9710SKen.Powell@Sun.COM 			 */
328*9710SKen.Powell@Sun.COM 			if ((newtsl = labeldup(tsl, KM_NOSLEEP)) == NULL) {
329*9710SKen.Powell@Sun.COM 				TPC_RELE(dst_rhtp);
330*9710SKen.Powell@Sun.COM 				return (ENOMEM);
331*9710SKen.Powell@Sun.COM 			}
332*9710SKen.Powell@Sun.COM 			newtsl->tsl_flags ^= TSLF_UNLABELED;
333*9710SKen.Powell@Sun.COM 		}
334*9710SKen.Powell@Sun.COM 		break;
335*9710SKen.Powell@Sun.COM 
336*9710SKen.Powell@Sun.COM 	default:
337*9710SKen.Powell@Sun.COM 		TPC_RELE(dst_rhtp);
338*9710SKen.Powell@Sun.COM 		return (EHOSTUNREACH);
339*9710SKen.Powell@Sun.COM 	}
340*9710SKen.Powell@Sun.COM 
341*9710SKen.Powell@Sun.COM 	/*
342*9710SKen.Powell@Sun.COM 	 * Generate a new cred if we modified the security label or
343*9710SKen.Powell@Sun.COM 	 * label flags.
344*9710SKen.Powell@Sun.COM 	 */
345*9710SKen.Powell@Sun.COM 	if (newtsl != NULL) {
346*9710SKen.Powell@Sun.COM 		*effective_cred = copycred_from_tslabel(credp,
347*9710SKen.Powell@Sun.COM 		    newtsl, KM_NOSLEEP);
348*9710SKen.Powell@Sun.COM 		label_rele(newtsl);
349*9710SKen.Powell@Sun.COM 		if (*effective_cred == NULL) {
350*9710SKen.Powell@Sun.COM 			TPC_RELE(dst_rhtp);
351*9710SKen.Powell@Sun.COM 			return (ENOMEM);
352*9710SKen.Powell@Sun.COM 		}
353*9710SKen.Powell@Sun.COM 	}
354*9710SKen.Powell@Sun.COM 	TPC_RELE(dst_rhtp);
355*9710SKen.Powell@Sun.COM 	return (0);
356*9710SKen.Powell@Sun.COM }
357*9710SKen.Powell@Sun.COM 
358*9710SKen.Powell@Sun.COM /*
3591676Sjpk  * tsol_compute_label()
3601676Sjpk  *
3611676Sjpk  * This routine computes the IP label that should be on a packet based on the
3621676Sjpk  * connection and destination information.
3631676Sjpk  *
3641676Sjpk  * Returns:
3651676Sjpk  *      0		Fetched label
366*9710SKen.Powell@Sun.COM  *	EHOSTUNREACH	No route to destination
3671676Sjpk  *	EINVAL		Label cannot be computed
3681676Sjpk  */
3691676Sjpk int
3701676Sjpk tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage,
371*9710SKen.Powell@Sun.COM     ip_stack_t *ipst)
3721676Sjpk {
3731676Sjpk 	uint_t		sec_opt_len;
3741676Sjpk 	ts_label_t	*tsl;
3751676Sjpk 	ire_t		*ire, *sire = NULL;
3761676Sjpk 	tsol_ire_gw_secattr_t *attrp;
3773448Sdh155122 	zoneid_t	zoneid, ip_zoneid;
3781676Sjpk 
3798778SErik.Nordmark@Sun.COM 	ASSERT(credp != NULL);
3808778SErik.Nordmark@Sun.COM 
3811676Sjpk 	if (opt_storage != NULL)
3821676Sjpk 		opt_storage[IPOPT_OLEN] = 0;
3831676Sjpk 
3841676Sjpk 	if ((tsl = crgetlabel(credp)) == NULL)
3851676Sjpk 		return (0);
3861676Sjpk 
3871676Sjpk 	/* always pass multicast */
3881676Sjpk 	if (CLASSD(dst))
3891676Sjpk 		return (0);
3901676Sjpk 
391*9710SKen.Powell@Sun.COM 	if (tsl->tsl_flags & TSLF_UNLABELED) {
3921676Sjpk 
393*9710SKen.Powell@Sun.COM 		/*
394*9710SKen.Powell@Sun.COM 		 * The destination is unlabeled. Only add a label if the
395*9710SKen.Powell@Sun.COM 		 * destination is not a broadcast/local/loopback address,
396*9710SKen.Powell@Sun.COM 		 * the destination is not on the same subnet, and the
397*9710SKen.Powell@Sun.COM 		 * next-hop gateway is labeled.
398*9710SKen.Powell@Sun.COM 		 *
399*9710SKen.Powell@Sun.COM 		 * For exclusive stacks we set the zoneid to zero
400*9710SKen.Powell@Sun.COM 		 * to operate as if we are in the global zone for
401*9710SKen.Powell@Sun.COM 		 * IRE lookups.
402*9710SKen.Powell@Sun.COM 		 */
403*9710SKen.Powell@Sun.COM 		zoneid = crgetzoneid(credp);
404*9710SKen.Powell@Sun.COM 		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
405*9710SKen.Powell@Sun.COM 			ip_zoneid = GLOBAL_ZONEID;
406*9710SKen.Powell@Sun.COM 		else
407*9710SKen.Powell@Sun.COM 			ip_zoneid = zoneid;
4083448Sdh155122 
4093448Sdh155122 		ire = ire_cache_lookup(dst, ip_zoneid, tsl, ipst);
4101676Sjpk 
4111676Sjpk 		if (ire != NULL && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL |
4121676Sjpk 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
4131676Sjpk 			IRE_REFRELE(ire);
4141676Sjpk 			return (0);
4151676Sjpk 		} else if (ire == NULL) {
4161676Sjpk 			ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire,
4173448Sdh155122 			    ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
4186596Skp158701 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst);
4191676Sjpk 		}
4201676Sjpk 
4211676Sjpk 		/* no route to destination */
4221676Sjpk 		if (ire == NULL) {
423*9710SKen.Powell@Sun.COM 			DTRACE_PROBE3(
4241676Sjpk 			    tx__tnopt__log__info__labeling__routedst__v4,
425*9710SKen.Powell@Sun.COM 			    char *, "No route to unlabeled dest ip(1) with "
426*9710SKen.Powell@Sun.COM 			    "creds(2).", ipaddr_t, dst, cred_t *, credp);
427*9710SKen.Powell@Sun.COM 			return (EHOSTUNREACH);
4281676Sjpk 		}
4291676Sjpk 
4301676Sjpk 		/*
4311676Sjpk 		 * Prefix IRE from f-table lookup means that the destination
4321676Sjpk 		 * is not directly connected; check the next-hop attributes.
4331676Sjpk 		 */
4341676Sjpk 		if (sire != NULL) {
4351676Sjpk 			ASSERT(ire != NULL);
4361676Sjpk 			IRE_REFRELE(ire);
4371676Sjpk 			ire = sire;
4381676Sjpk 		}
4391676Sjpk 
4401676Sjpk 		/*
441*9710SKen.Powell@Sun.COM 		 * Return now if next hop gateway is unlabeled. There is
442*9710SKen.Powell@Sun.COM 		 * no need to generate a CIPSO option for this message.
4431676Sjpk 		 */
444*9710SKen.Powell@Sun.COM 		attrp = ire->ire_gw_secattr;
445*9710SKen.Powell@Sun.COM 		if (attrp == NULL || attrp->igsa_rhc == NULL ||
446*9710SKen.Powell@Sun.COM 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type == UNLABELED) {
4471676Sjpk 			IRE_REFRELE(ire);
448*9710SKen.Powell@Sun.COM 			return (0);
4491676Sjpk 		}
4501676Sjpk 
4511676Sjpk 		IRE_REFRELE(ire);
4521676Sjpk 
4531676Sjpk 	}
4541676Sjpk 
4551676Sjpk 	/* compute the CIPSO option */
456*9710SKen.Powell@Sun.COM 	sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
457*9710SKen.Powell@Sun.COM 	    tsl->tsl_doi);
4581676Sjpk 
4591676Sjpk 	if (sec_opt_len == 0) {
460*9710SKen.Powell@Sun.COM 		DTRACE_PROBE3(tx__tnopt__log__error__labeling__lostops__v4,
461*9710SKen.Powell@Sun.COM 		    char *, "options lack length for dest ip(1) with creds(2).",
462*9710SKen.Powell@Sun.COM 		    ipaddr_t, dst, cred_t *, credp);
4631676Sjpk 		return (EINVAL);
4641676Sjpk 	}
4651676Sjpk 
4661676Sjpk 	return (0);
4671676Sjpk }
4681676Sjpk 
4691676Sjpk /*
4701676Sjpk  * Remove any existing security option (CIPSO) from the given IP
4711676Sjpk  * header, move the 'buflen' bytes back to fill the gap, and return the number
4721676Sjpk  * of bytes removed (as zero or negative number).  Assumes that the headers are
4731676Sjpk  * sane.
4741676Sjpk  */
4751676Sjpk int
4761676Sjpk tsol_remove_secopt(ipha_t *ipha, int buflen)
4771676Sjpk {
4781676Sjpk 	int remlen, olen, oval, delta;
4791676Sjpk 	uchar_t *fptr, *tptr;
4801676Sjpk 	boolean_t noop_keep;
4811676Sjpk 
4821676Sjpk 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
4831676Sjpk 	fptr = tptr = (uchar_t *)(ipha + 1);
4841676Sjpk 	noop_keep = B_TRUE;
4851676Sjpk 	while (remlen > 0) {
4861676Sjpk 		oval = fptr[IPOPT_OPTVAL];
4871676Sjpk 
4881676Sjpk 		/* terminate on end of list */
4891676Sjpk 		if (oval == IPOPT_EOL)
4901676Sjpk 			break;
4911676Sjpk 
4921676Sjpk 		/*
4931676Sjpk 		 * Delete any no-ops following a deleted option, at least up
4941676Sjpk 		 * to a 4 octet alignment; copy others.
4951676Sjpk 		 */
4961676Sjpk 		if (oval == IPOPT_NOP) {
4971676Sjpk 			if (((fptr - (uchar_t *)ipha) & 3) == 0)
4981676Sjpk 				noop_keep = B_TRUE;
4991676Sjpk 			if (noop_keep)
5001676Sjpk 				*tptr++ = oval;
5011676Sjpk 			fptr++;
5021676Sjpk 			remlen--;
5031676Sjpk 			continue;
5041676Sjpk 		}
5051676Sjpk 
5061676Sjpk 		/* stop on corrupted list; just do nothing. */
5071676Sjpk 		if (remlen < 2)
5081676Sjpk 			return (0);
5091676Sjpk 		olen = fptr[IPOPT_OLEN];
5101676Sjpk 		if (olen < 2 || olen > remlen)
5111676Sjpk 			return (0);
5121676Sjpk 
5131676Sjpk 		/* skip over security options to delete them */
5141676Sjpk 		if (oval == IPOPT_COMSEC || oval == IPOPT_SECURITY) {
5151676Sjpk 			noop_keep = B_FALSE;
5161676Sjpk 			fptr += olen;
5171676Sjpk 			remlen -= olen;
5181676Sjpk 			continue;
5191676Sjpk 		}
5201676Sjpk 
5211676Sjpk 		/* copy the rest */
5221676Sjpk 		noop_keep = B_TRUE;
5231676Sjpk 		if (tptr != fptr)
5241676Sjpk 			ovbcopy(fptr, tptr, olen);
5251676Sjpk 		fptr += olen;
5261676Sjpk 		tptr += olen;
5271676Sjpk 		remlen -= olen;
5281676Sjpk 	}
5291676Sjpk 
5301676Sjpk 	fptr += remlen;
5311676Sjpk 
5321676Sjpk 	/* figure how much padding we'll need for header alignment */
5331676Sjpk 	olen = (tptr - (uchar_t *)ipha) & 3;
5341676Sjpk 	if (olen > 0) {
5351676Sjpk 		olen = 4 - olen;
5361676Sjpk 		/* pad with end-of-list */
5371676Sjpk 		bzero(tptr, olen);
5381676Sjpk 		tptr += olen;
5391676Sjpk 	}
5401676Sjpk 
5411676Sjpk 	/* slide back the headers that follow and update the IP header */
5421676Sjpk 	delta = fptr - tptr;
5431676Sjpk 	if (delta != 0) {
5441676Sjpk 		ovbcopy(fptr, tptr, ((uchar_t *)ipha + buflen) - fptr);
5451676Sjpk 		ipha->ipha_version_and_hdr_length -= delta / 4;
5461676Sjpk 	}
5471676Sjpk 	return (-delta);
5481676Sjpk }
5491676Sjpk 
5501676Sjpk /*
5511676Sjpk  * Insert the option in 'optbuf' into the IP header pointed to by 'ipha', and
5521676Sjpk  * move the data following the IP header (up to buflen) to accomodate the new
5531676Sjpk  * option.  Assumes that up to IP_MAX_OPT_LENGTH bytes are available (in total)
5541676Sjpk  * for IP options.  Returns the number of bytes actually inserted, or -1 if the
5551676Sjpk  * option cannot be inserted.  (Note that negative return values are possible
5561676Sjpk  * when noops must be compressed, and that only -1 indicates error.  Successful
5571676Sjpk  * return value is always evenly divisible by 4, by definition.)
5581676Sjpk  */
5591676Sjpk int
5601676Sjpk tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen)
5611676Sjpk {
5621676Sjpk 	int remlen, padding, lastpad, totlen;
5631676Sjpk 	int oval, olen;
5641676Sjpk 	int delta;
5651676Sjpk 	uchar_t *optr;
5661676Sjpk 	uchar_t tempopt[IP_MAX_OPT_LENGTH], *toptr;
5671676Sjpk 
5681676Sjpk 	if (optbuf[IPOPT_OPTVAL] == IPOPT_EOL ||
5691676Sjpk 	    optbuf[IPOPT_OPTVAL] == IPOPT_NOP ||
5701676Sjpk 	    optbuf[IPOPT_OLEN] == 0)
5711676Sjpk 		return (0);
5721676Sjpk 
5731676Sjpk 	ASSERT(optbuf[IPOPT_OLEN] >= 2 &&
5741676Sjpk 	    optbuf[IPOPT_OLEN] <= IP_MAX_OPT_LENGTH);
5751676Sjpk 
5761676Sjpk 	/* first find the real (unpadded) length of the existing options */
5771676Sjpk 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
5781676Sjpk 	padding = totlen = lastpad = 0;
5791676Sjpk 	optr = (uchar_t *)(ipha + 1);
5801676Sjpk 	while (remlen > 0) {
5811676Sjpk 		oval = optr[IPOPT_OPTVAL];
5821676Sjpk 
5831676Sjpk 		/* stop at end of list */
5841676Sjpk 		if (oval == IPOPT_EOL)
5851676Sjpk 			break;
5861676Sjpk 
5871676Sjpk 		/* skip no-ops, noting that length byte isn't present */
5881676Sjpk 		if (oval == IPOPT_NOP) {
5891676Sjpk 			optr++;
5901676Sjpk 			padding++;
5911676Sjpk 			lastpad++;
5921676Sjpk 			totlen++;
5931676Sjpk 			remlen--;
5941676Sjpk 			continue;
5951676Sjpk 		}
5961676Sjpk 
5971676Sjpk 		/* give up on a corrupted list; report failure */
5981676Sjpk 		if (remlen < 2)
5991676Sjpk 			return (-1);
6001676Sjpk 		olen = optr[IPOPT_OLEN];
6011676Sjpk 		if (olen < 2 || olen > remlen)
6021676Sjpk 			return (-1);
6031676Sjpk 
6041676Sjpk 		lastpad = 0;
6051676Sjpk 		optr += olen;
6061676Sjpk 		totlen += olen;
6071676Sjpk 		remlen -= olen;
6081676Sjpk 	}
6091676Sjpk 
6101676Sjpk 	/* completely ignore any trailing padding */
6111676Sjpk 	totlen -= lastpad;
6121676Sjpk 	padding -= lastpad;
6131676Sjpk 
6141676Sjpk 	/*
6151676Sjpk 	 * If some sort of inter-option alignment was present, try to preserve
6161676Sjpk 	 * that alignment.  If alignment pushes us out past the maximum, then
6171676Sjpk 	 * discard it and try to compress to fit.  (We just "assume" that any
6181676Sjpk 	 * padding added was attempting to get 32 bit alignment.  If that's
6191676Sjpk 	 * wrong, that's just too bad.)
6201676Sjpk 	 */
6211676Sjpk 	if (padding > 0) {
6221676Sjpk 		olen = (optbuf[IPOPT_OLEN] + 3) & ~3;
6231676Sjpk 		if (olen + totlen > IP_MAX_OPT_LENGTH) {
6241676Sjpk 			totlen -= padding;
6251676Sjpk 			if (olen + totlen > IP_MAX_OPT_LENGTH)
6261676Sjpk 				return (-1);
6271676Sjpk 			padding = 0;
6281676Sjpk 		}
6291676Sjpk 	}
6301676Sjpk 
6311676Sjpk 	/*
6321676Sjpk 	 * Since we may need to compress or expand the option list, we write to
6331676Sjpk 	 * a temporary buffer and then copy the results back to the IP header.
6341676Sjpk 	 */
6351676Sjpk 	toptr = tempopt;
6361676Sjpk 
6371676Sjpk 	/* compute actual option to insert */
6381676Sjpk 	olen = optbuf[IPOPT_OLEN];
6391676Sjpk 	bcopy(optbuf, toptr, olen);
6401676Sjpk 	toptr += olen;
6411676Sjpk 	if (padding > 0) {
6421676Sjpk 		while ((olen & 3) != 0) {
6431676Sjpk 			*toptr++ = IPOPT_NOP;
6441676Sjpk 			olen++;
6451676Sjpk 		}
6461676Sjpk 	}
6471676Sjpk 
6481676Sjpk 	/* copy over the existing options */
6491676Sjpk 	optr = (uchar_t *)(ipha + 1);
6501676Sjpk 	while (totlen > 0) {
6511676Sjpk 		oval = optr[IPOPT_OPTVAL];
6521676Sjpk 
6531676Sjpk 		/* totlen doesn't include end-of-list marker */
6541676Sjpk 		ASSERT(oval != IPOPT_EOL);
6551676Sjpk 
6561676Sjpk 		/* handle no-ops; copy if desired, ignore otherwise */
6571676Sjpk 		if (oval == IPOPT_NOP) {
6581676Sjpk 			if (padding > 0) {
6591676Sjpk 				/* note: cannot overflow due to checks above */
6601676Sjpk 				ASSERT(toptr < tempopt + IP_MAX_OPT_LENGTH);
6611676Sjpk 				*toptr++ = oval;
6621676Sjpk 			}
6631676Sjpk 			optr++;
6641676Sjpk 			totlen--;
6651676Sjpk 			continue;
6661676Sjpk 		}
6671676Sjpk 
6681676Sjpk 		/* list cannot be corrupt at this point */
6691676Sjpk 		ASSERT(totlen >= 2);
6701676Sjpk 		olen = optr[IPOPT_OLEN];
6711676Sjpk 		ASSERT(olen >= 2 && olen <= totlen);
6721676Sjpk 
6731676Sjpk 		/* cannot run out of room due to tests above */
6741676Sjpk 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
6751676Sjpk 
6761676Sjpk 		bcopy(optr, toptr, olen);
6771676Sjpk 		optr += olen;
6781676Sjpk 		toptr += olen;
6791676Sjpk 		totlen -= olen;
6801676Sjpk 	}
6811676Sjpk 
6821676Sjpk 	/* figure how much padding we'll need for header alignment */
6831676Sjpk 	olen = (toptr - tempopt) & 3;
6841676Sjpk 	if (olen > 0) {
6851676Sjpk 		olen = 4 - olen;
6861676Sjpk 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
6871676Sjpk 		/* pad with end-of-list value */
6881676Sjpk 		bzero(toptr, olen);
6891676Sjpk 		toptr += olen;
6901676Sjpk 	}
6911676Sjpk 
6921676Sjpk 	/* move the headers as needed and update IP header */
6931676Sjpk 	olen = (toptr - tempopt) + IP_SIMPLE_HDR_LENGTH;
6941676Sjpk 	remlen = IPH_HDR_LENGTH(ipha);
6951676Sjpk 	delta = olen - remlen;
6961676Sjpk 	if (delta != 0) {
6971676Sjpk 		ovbcopy((uchar_t *)ipha + remlen, (uchar_t *)ipha + olen,
6981676Sjpk 		    buflen - remlen);
6991676Sjpk 		ipha->ipha_version_and_hdr_length += delta / 4;
7001676Sjpk 	}
7011676Sjpk 
7021676Sjpk 	/* slap in the new options */
7031676Sjpk 	bcopy(tempopt, ipha + 1, olen - IP_SIMPLE_HDR_LENGTH);
7041676Sjpk 
7051676Sjpk 	return (delta);
7061676Sjpk }
7071676Sjpk 
7081676Sjpk /*
7091676Sjpk  * tsol_check_label()
7101676Sjpk  *
7111676Sjpk  * This routine computes the IP label that should be on the packet based on the
7121676Sjpk  * connection and destination information.  If the label is there, it returns
7131676Sjpk  * zero, so the caller knows that the label is syncronized, and further calls
7141676Sjpk  * are not required.  If the label isn't right, then the right one is inserted.
7151676Sjpk  *
7166596Skp158701  * The packet's header is clear before entering IPsec's engine.
7171676Sjpk  *
7181676Sjpk  * Returns:
7191676Sjpk  *      0		Label on packet (was|is now) correct
7201676Sjpk  *      EACCES		The packet failed the remote host accreditation.
7211676Sjpk  *      ENOMEM		Memory allocation failure.
7221676Sjpk  *	EINVAL		Label cannot be computed
7231676Sjpk  */
7241676Sjpk int
7256596Skp158701 tsol_check_label(const cred_t *credp, mblk_t **mpp, boolean_t isexempt,
726*9710SKen.Powell@Sun.COM     ip_stack_t *ipst, pid_t pid)
7271676Sjpk {
7281676Sjpk 	mblk_t *mp = *mpp;
7291676Sjpk 	ipha_t  *ipha;
730*9710SKen.Powell@Sun.COM 	cred_t *effective_cred = NULL;
7311676Sjpk 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
7321676Sjpk 	uint_t hlen;
7331676Sjpk 	uint_t sec_opt_len;
7341676Sjpk 	uchar_t *optr;
7356596Skp158701 	int delta_remove = 0, delta_add, adjust;
7361676Sjpk 	int retv;
7371676Sjpk 
7381676Sjpk 	opt_storage[IPOPT_OPTVAL] = 0;
7391676Sjpk 
7401676Sjpk 	ipha = (ipha_t *)mp->b_rptr;
7411676Sjpk 
742*9710SKen.Powell@Sun.COM 	/*
743*9710SKen.Powell@Sun.COM 	 * Verify the destination is allowed to receive packets at
744*9710SKen.Powell@Sun.COM 	 * the security label of the message data. check_dest()
745*9710SKen.Powell@Sun.COM 	 * may create a new effective cred with a modified label
746*9710SKen.Powell@Sun.COM 	 * or label flags. Apply any such cred to the message block
747*9710SKen.Powell@Sun.COM 	 * for use in future routing decisions.
748*9710SKen.Powell@Sun.COM 	 */
749*9710SKen.Powell@Sun.COM 	retv = tsol_check_dest(credp, &ipha->ipha_dst, IPV4_VERSION,
750*9710SKen.Powell@Sun.COM 	    isexempt, &effective_cred);
7511676Sjpk 	if (retv != 0)
7521676Sjpk 		return (retv);
7531676Sjpk 
754*9710SKen.Powell@Sun.COM 	/*
755*9710SKen.Powell@Sun.COM 	 * Calculate the security label to be placed in the text
756*9710SKen.Powell@Sun.COM 	 * of the message (if any).
757*9710SKen.Powell@Sun.COM 	 */
758*9710SKen.Powell@Sun.COM 	if (effective_cred != NULL) {
759*9710SKen.Powell@Sun.COM 		if ((retv = tsol_compute_label(effective_cred,
760*9710SKen.Powell@Sun.COM 		    ipha->ipha_dst, opt_storage, ipst)) != 0) {
761*9710SKen.Powell@Sun.COM 			crfree(effective_cred);
762*9710SKen.Powell@Sun.COM 			return (retv);
763*9710SKen.Powell@Sun.COM 		}
764*9710SKen.Powell@Sun.COM 		mblk_setcred(mp, effective_cred, pid);
765*9710SKen.Powell@Sun.COM 		crfree(effective_cred);
766*9710SKen.Powell@Sun.COM 	} else {
767*9710SKen.Powell@Sun.COM 		if ((retv = tsol_compute_label(credp,
768*9710SKen.Powell@Sun.COM 		    ipha->ipha_dst, opt_storage, ipst)) != 0) {
769*9710SKen.Powell@Sun.COM 			return (retv);
770*9710SKen.Powell@Sun.COM 		}
771*9710SKen.Powell@Sun.COM 	}
772*9710SKen.Powell@Sun.COM 
7731676Sjpk 	optr = (uchar_t *)(ipha + 1);
7741676Sjpk 	hlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
7751676Sjpk 	sec_opt_len = opt_storage[IPOPT_OLEN];
7761676Sjpk 
7771676Sjpk 	if (hlen >= sec_opt_len) {
7781676Sjpk 		/* If no option is supposed to be there, make sure it's not */
7791676Sjpk 		if (sec_opt_len == 0 && hlen > 0 &&
7801676Sjpk 		    optr[IPOPT_OPTVAL] != IPOPT_COMSEC &&
7811676Sjpk 		    optr[IPOPT_OPTVAL] != IPOPT_SECURITY)
7821676Sjpk 			return (0);
7831676Sjpk 		/* if the option is there, it's always first */
7841676Sjpk 		if (sec_opt_len != 0 &&
7851676Sjpk 		    bcmp(opt_storage, optr, sec_opt_len) == 0)
7861676Sjpk 			return (0);
7871676Sjpk 	}
7881676Sjpk 
7891676Sjpk 	/*
7901676Sjpk 	 * If there is an option there, then it must be the wrong one; delete.
7911676Sjpk 	 */
7926596Skp158701 	if (hlen > 0) {
7936596Skp158701 		delta_remove = tsol_remove_secopt(ipha, MBLKL(mp));
7946596Skp158701 		mp->b_wptr += delta_remove;
7956596Skp158701 	}
7961676Sjpk 
7971676Sjpk 	/* Make sure we have room for the worst-case addition */
7981676Sjpk 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
7991676Sjpk 	hlen = (hlen + 3) & ~3;
8001676Sjpk 	if (hlen > IP_MAX_HDR_LENGTH)
8011676Sjpk 		hlen = IP_MAX_HDR_LENGTH;
8021676Sjpk 	hlen -= IPH_HDR_LENGTH(ipha);
8031676Sjpk 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
8041676Sjpk 		int copylen;
8051676Sjpk 		mblk_t *new_mp;
8061676Sjpk 
8071676Sjpk 		/* allocate enough to be meaningful, but not *too* much */
8081676Sjpk 		copylen = MBLKL(mp);
8091676Sjpk 		if (copylen > 256)
8101676Sjpk 			copylen = 256;
8118778SErik.Nordmark@Sun.COM 		new_mp = allocb_tmpl(hlen + copylen +
8128778SErik.Nordmark@Sun.COM 		    (mp->b_rptr - mp->b_datap->db_base), mp);
8131676Sjpk 		if (new_mp == NULL)
8141676Sjpk 			return (ENOMEM);
8151676Sjpk 
8161676Sjpk 		/* keep the bias */
8171676Sjpk 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
8181676Sjpk 		new_mp->b_wptr = new_mp->b_rptr + copylen;
8191676Sjpk 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
8201676Sjpk 		new_mp->b_cont = mp;
8211676Sjpk 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
8221676Sjpk 			new_mp->b_cont = mp->b_cont;
8231676Sjpk 			freeb(mp);
8241676Sjpk 		}
8251676Sjpk 		*mpp = mp = new_mp;
8261676Sjpk 		ipha = (ipha_t *)mp->b_rptr;
8271676Sjpk 	}
8281676Sjpk 
8296596Skp158701 	delta_add = tsol_prepend_option(opt_storage, ipha, MBLKL(mp));
8306596Skp158701 	if (delta_add == -1)
8311676Sjpk 		goto param_prob;
8321676Sjpk 
8336596Skp158701 	ASSERT((mp->b_wptr + delta_add) <= DB_LIM(mp));
8346596Skp158701 	mp->b_wptr += delta_add;
8351676Sjpk 
8366596Skp158701 	adjust = delta_remove + delta_add;
8376596Skp158701 	adjust += ntohs(ipha->ipha_length);
8386596Skp158701 	ipha->ipha_length = htons(adjust);
8391676Sjpk 
8401676Sjpk 	return (0);
8411676Sjpk 
8421676Sjpk param_prob:
8431676Sjpk 	return (EINVAL);
8441676Sjpk }
8451676Sjpk 
8461676Sjpk /*
8471676Sjpk  * IPv6 HopOpt extension header for the label option layout:
8481676Sjpk  *	- One octet giving the type of the 'next extension header'
8491676Sjpk  *	- Header extension length in 8-byte words, not including the
8501676Sjpk  *	  1st 8 bytes, but including any pad bytes at the end.
8511676Sjpk  *	  Eg. A value of 2 means 16 bytes not including the 1st 8 bytes.
8521676Sjpk  *	- Followed by TLV encoded IPv6 label option. Option layout is
8531676Sjpk  *		* One octet, IP6OPT_LS
8541676Sjpk  *		* One octet option length in bytes of the option data following
8551676Sjpk  *		  the length, but not including any pad bytes at the end.
8561676Sjpk  *		* Four-octet DOI (IP6LS_DOI_V4)
8571676Sjpk  *		* One octet suboption, IP6LS_TT_V4
8581676Sjpk  *		* One octet suboption length in bytes of the suboption
8591676Sjpk  *		  following the suboption length, including the suboption
8601676Sjpk  *		  header length, but not including any pad bytes at the end.
8611676Sjpk  *	- Pad to make the extension header a multiple of 8 bytes.
8621676Sjpk  *
8631676Sjpk  * This function returns the contents of 'IPv6 option structure' in the above.
8641676Sjpk  * i.e starting from the IP6OPT_LS but not including the pad at the end.
8651676Sjpk  * The user must prepend two octets (either padding or next header / length)
8661676Sjpk  * and append padding out to the next 8 octet boundary.
8671676Sjpk  */
8681676Sjpk int
8691676Sjpk tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst,
870*9710SKen.Powell@Sun.COM     uchar_t *opt_storage, ip_stack_t *ipst)
8711676Sjpk {
8721676Sjpk 	ts_label_t	*tsl;
8731676Sjpk 	uint_t		sec_opt_len;
8741676Sjpk 	uint32_t	doi;
8753448Sdh155122 	zoneid_t	zoneid, ip_zoneid;
8761676Sjpk 	ire_t		*ire, *sire;
8771676Sjpk 	tsol_ire_gw_secattr_t *attrp;
8781676Sjpk 
8798778SErik.Nordmark@Sun.COM 	ASSERT(credp != NULL);
8808778SErik.Nordmark@Sun.COM 
8811676Sjpk 	if (ip6opt_ls == 0)
8821676Sjpk 		return (EINVAL);
8831676Sjpk 
8841676Sjpk 	if (opt_storage != NULL)
8851676Sjpk 		opt_storage[IPOPT_OLEN] = 0;
8861676Sjpk 
8871676Sjpk 	if ((tsl = crgetlabel(credp)) == NULL)
8881676Sjpk 		return (0);
8891676Sjpk 
8901676Sjpk 	/* Always pass multicast */
8911676Sjpk 	if (IN6_IS_ADDR_MULTICAST(dst))
8921676Sjpk 		return (0);
8931676Sjpk 
8941676Sjpk 	zoneid = crgetzoneid(credp);
8951676Sjpk 
8961676Sjpk 	/*
8971676Sjpk 	 * Fill in a V6 label.  If a new format is added here, make certain
8981676Sjpk 	 * that the maximum size of this label is reflected in sys/tsol/tnet.h
8991676Sjpk 	 * as TSOL_MAX_IPV6_OPTION.
9001676Sjpk 	 */
901*9710SKen.Powell@Sun.COM 	if (tsl->tsl_flags & TSLF_UNLABELED) {
9021676Sjpk 		/*
903*9710SKen.Powell@Sun.COM 		 * The destination is unlabeled. Only add a label if the
904*9710SKen.Powell@Sun.COM 		 * destination is not broadcast/local/loopback address,
905*9710SKen.Powell@Sun.COM 		 * the destination is not on the same subnet, and the
906*9710SKen.Powell@Sun.COM 		 * next-hop gateway is labeled.
907*9710SKen.Powell@Sun.COM 		 *
908*9710SKen.Powell@Sun.COM 		 * For exclusive stacks we set the zoneid to zero to
909*9710SKen.Powell@Sun.COM 		 * operate as if we are in the global zone when
910*9710SKen.Powell@Sun.COM 		 * performing IRE lookups and conn_t comparisons.
9111676Sjpk 		 */
912*9710SKen.Powell@Sun.COM 		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
913*9710SKen.Powell@Sun.COM 			ip_zoneid = GLOBAL_ZONEID;
914*9710SKen.Powell@Sun.COM 		else
915*9710SKen.Powell@Sun.COM 			ip_zoneid = zoneid;
916*9710SKen.Powell@Sun.COM 
9171676Sjpk 		sire = NULL;
9183448Sdh155122 		ire = ire_cache_lookup_v6(dst, ip_zoneid, tsl, ipst);
9191676Sjpk 
9201676Sjpk 		if (ire != NULL && (ire->ire_type & (IRE_LOCAL |
9211676Sjpk 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
9221676Sjpk 			IRE_REFRELE(ire);
9231676Sjpk 			return (0);
9241676Sjpk 		} else if (ire == NULL) {
9251676Sjpk 			ire = ire_ftable_lookup_v6(dst, NULL, NULL, 0, NULL,
9263448Sdh155122 			    &sire, ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
9273448Sdh155122 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst);
9281676Sjpk 		}
9291676Sjpk 
9301676Sjpk 		/* no route to destination */
9311676Sjpk 		if (ire == NULL) {
932*9710SKen.Powell@Sun.COM 			DTRACE_PROBE3(
9331676Sjpk 			    tx__tnopt__log__info__labeling__routedst__v6,
934*9710SKen.Powell@Sun.COM 			    char *, "No route to unlabeled dest ip6(1) with "
935*9710SKen.Powell@Sun.COM 			    "creds(2).", in6_addr_t *, dst, cred_t *, credp);
936*9710SKen.Powell@Sun.COM 			return (EHOSTUNREACH);
9371676Sjpk 		}
9381676Sjpk 
9391676Sjpk 		/*
9401676Sjpk 		 * Prefix IRE from f-table lookup means that the destination
9411676Sjpk 		 * is not directly connected; check the next-hop attributes.
9421676Sjpk 		 */
9431676Sjpk 		if (sire != NULL) {
9441676Sjpk 			ASSERT(ire != NULL);
9451676Sjpk 			IRE_REFRELE(ire);
9461676Sjpk 			ire = sire;
9471676Sjpk 		}
9481676Sjpk 
949*9710SKen.Powell@Sun.COM 		/*
950*9710SKen.Powell@Sun.COM 		 * Return now if next hop gateway is unlabeled. There is
951*9710SKen.Powell@Sun.COM 		 * no need to generate a CIPSO option for this message.
952*9710SKen.Powell@Sun.COM 		 */
9531676Sjpk 		attrp = ire->ire_gw_secattr;
954*9710SKen.Powell@Sun.COM 		if (attrp == NULL || attrp->igsa_rhc == NULL ||
955*9710SKen.Powell@Sun.COM 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type == UNLABELED) {
9561676Sjpk 			IRE_REFRELE(ire);
957*9710SKen.Powell@Sun.COM 			return (0);
9581676Sjpk 		}
9591676Sjpk 		IRE_REFRELE(ire);
9601676Sjpk 	}
9611676Sjpk 
9621676Sjpk 	/* compute the CIPSO option */
9631676Sjpk 	if (opt_storage != NULL)
9641676Sjpk 		opt_storage += 8;
965*9710SKen.Powell@Sun.COM 	sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
966*9710SKen.Powell@Sun.COM 	    tsl->tsl_doi);
9671676Sjpk 
9681676Sjpk 	if (sec_opt_len == 0) {
969*9710SKen.Powell@Sun.COM 		DTRACE_PROBE3(tx__tnopt__log__error__labeling__lostops__v6,
970*9710SKen.Powell@Sun.COM 		    char *, "options lack length for dest ip6(1) with "
971*9710SKen.Powell@Sun.COM 		    "creds(2).", in6_addr_t *, dst, cred_t *, credp);
9721676Sjpk 		return (EINVAL);
9731676Sjpk 	}
9741676Sjpk 
9751676Sjpk 	if (opt_storage == NULL)
9761676Sjpk 		return (0);
9771676Sjpk 
9781676Sjpk 	if (sec_opt_len < IP_MAX_OPT_LENGTH)
9791676Sjpk 		opt_storage[sec_opt_len] = IPOPT_EOL;
9801676Sjpk 
9811676Sjpk 	/*
9821676Sjpk 	 * Just in case the option length is odd, round it up to the next even
9831676Sjpk 	 * multiple.  The IPv6 option definition doesn't like odd numbers for
9841676Sjpk 	 * some reason.
9851676Sjpk 	 *
9861676Sjpk 	 * Length in the overall option header (IP6OPT_LS) does not include the
9871676Sjpk 	 * option header itself, but the length in the suboption does include
9881676Sjpk 	 * the suboption header.  Thus, when there's just one suboption, the
9891676Sjpk 	 * length in the option header is the suboption length plus 4 (for the
9901676Sjpk 	 * DOI value).
9911676Sjpk 	 */
9921676Sjpk 	opt_storage[-2] = IP6LS_TT_V4;
9931676Sjpk 	opt_storage[-1] = (sec_opt_len + 2 + 1) & ~1;
9941676Sjpk 	opt_storage[-8] = ip6opt_ls;
9951676Sjpk 	opt_storage[-7] = opt_storage[-1] + 4;
9961676Sjpk 	doi = htons(IP6LS_DOI_V4);
9971676Sjpk 	bcopy(&doi, opt_storage - 6, 4);
9981676Sjpk 
9991676Sjpk 	return (0);
10001676Sjpk }
10011676Sjpk 
10021676Sjpk /*
10031676Sjpk  * Locate the start of the IP6OPT_LS label option and return it.
10041676Sjpk  * Also return the start of the next non-pad option in after_secoptp.
10051676Sjpk  * Usually the label option is the first option at least when packets
10061676Sjpk  * are generated, but for generality we don't assume that on received packets.
10071676Sjpk  */
10081676Sjpk uchar_t *
10091676Sjpk tsol_find_secopt_v6(
10101676Sjpk     const uchar_t *ip6hbh,	/* Start of the hop-by-hop extension header */
10111676Sjpk     uint_t hbhlen,		/* Length of the hop-by-hop extension header */
10121676Sjpk     uchar_t **after_secoptp,	/* Non-pad option following the label option */
10131676Sjpk     boolean_t *hbh_needed)	/* Is hop-by-hop hdr needed w/o label */
10141676Sjpk {
10151676Sjpk 	uint_t	optlen;
10161676Sjpk 	uint_t	optused;
10171676Sjpk 	const uchar_t *optptr;
10181676Sjpk 	uchar_t	opt_type;
10191676Sjpk 	const uchar_t *secopt = NULL;
10201676Sjpk 
10211676Sjpk 	*hbh_needed = B_FALSE;
10221676Sjpk 	*after_secoptp = NULL;
10231676Sjpk 	optlen = hbhlen - 2;
10241676Sjpk 	optptr = ip6hbh + 2;
10251676Sjpk 	while (optlen != 0) {
10261676Sjpk 		opt_type = *optptr;
10271676Sjpk 		if (opt_type == IP6OPT_PAD1) {
10281676Sjpk 			optptr++;
10291676Sjpk 			optlen--;
10301676Sjpk 			continue;
10311676Sjpk 		}
10321676Sjpk 		if (optlen == 1)
10331676Sjpk 			break;
10341676Sjpk 		optused = 2 + optptr[1];
10351676Sjpk 		if (optused > optlen)
10361676Sjpk 			break;
10371676Sjpk 		/*
10381676Sjpk 		 * if we get here, ip6opt_ls can
10391676Sjpk 		 * not be 0 because it will always
10401676Sjpk 		 * match the IP6OPT_PAD1 above.
10411676Sjpk 		 * Therefore ip6opt_ls == 0 forces
10421676Sjpk 		 * this test to always fail here.
10431676Sjpk 		 */
10441676Sjpk 		if (opt_type == ip6opt_ls)
10451676Sjpk 			secopt = optptr;
10461676Sjpk 		else switch (opt_type) {
10471676Sjpk 		case IP6OPT_PADN:
10481676Sjpk 			break;
10491676Sjpk 		default:
10501676Sjpk 			/*
10511676Sjpk 			 * There is at least 1 option other than
10521676Sjpk 			 * the label option. So the hop-by-hop header is needed
10531676Sjpk 			 */
10541676Sjpk 			*hbh_needed = B_TRUE;
10551676Sjpk 			if (secopt != NULL) {
10561676Sjpk 				*after_secoptp = (uchar_t *)optptr;
10571676Sjpk 				return ((uchar_t *)secopt);
10581676Sjpk 			}
10591676Sjpk 			break;
10601676Sjpk 		}
10611676Sjpk 		optlen -= optused;
10621676Sjpk 		optptr += optused;
10631676Sjpk 	}
10641676Sjpk 	return ((uchar_t *)secopt);
10651676Sjpk }
10661676Sjpk 
10671676Sjpk /*
10681676Sjpk  * Remove the label option from the hop-by-hop options header if it exists.
10691676Sjpk  * 'buflen' is the total length of the packet typically b_wptr - b_rptr.
10701676Sjpk  * Header and data following the label option that is deleted are copied
10714564Swy83408  * (i.e. slid backward) to the right position, and returns the number
10724564Swy83408  * of bytes removed (as zero or negative number.)
10731676Sjpk  */
10741676Sjpk int
10751676Sjpk tsol_remove_secopt_v6(ip6_t *ip6h, int buflen)
10761676Sjpk {
10771676Sjpk 	uchar_t	*ip6hbh;	/* hop-by-hop header */
10781676Sjpk 	uint_t	hbhlen;		/* hop-by-hop extension header length */
10791676Sjpk 	uchar_t *secopt = NULL;
10801676Sjpk 	uchar_t *after_secopt;
10811676Sjpk 	uint_t	pad;
10821676Sjpk 	uint_t	delta;
10831676Sjpk 	boolean_t hbh_needed;
10841676Sjpk 
10851676Sjpk 	/*
10861676Sjpk 	 * hop-by-hop extension header must appear first, if it does not
10871676Sjpk 	 * exist, there is no label option.
10881676Sjpk 	 */
10891676Sjpk 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
10901676Sjpk 		return (0);
10911676Sjpk 
10921676Sjpk 	ip6hbh = (uchar_t *)&ip6h[1];
10931676Sjpk 	hbhlen = (ip6hbh[1] + 1) << 3;
10941676Sjpk 	/*
10951676Sjpk 	 * Locate the start of the label option if it exists and the end
10961676Sjpk 	 * of the label option including pads if any.
10971676Sjpk 	 */
10981676Sjpk 	secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
10991676Sjpk 	    &hbh_needed);
11001676Sjpk 	if (secopt == NULL)
11011676Sjpk 		return (0);
11021676Sjpk 	if (!hbh_needed) {
11031676Sjpk 		uchar_t	next_hdr;
11041676Sjpk 		/*
11051676Sjpk 		 * The label option was the only option in the hop-by-hop
11061676Sjpk 		 * header. We don't need the hop-by-hop header itself any
11071676Sjpk 		 * longer.
11081676Sjpk 		 */
11091676Sjpk 		next_hdr = ip6hbh[0];
11101676Sjpk 		ovbcopy(ip6hbh + hbhlen, ip6hbh,
11111676Sjpk 		    buflen - (IPV6_HDR_LEN + hbhlen));
11122776Skp158701 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - hbhlen);
11131676Sjpk 		ip6h->ip6_nxt = next_hdr;
11144564Swy83408 		return (-hbhlen);
11151676Sjpk 	}
11161676Sjpk 
11171676Sjpk 	if (after_secopt == NULL) {
11181676Sjpk 		/* There is no option following the label option */
11191676Sjpk 		after_secopt = ip6hbh + hbhlen;
11201676Sjpk 	}
11211676Sjpk 
11221676Sjpk 	/*
11231676Sjpk 	 * After deleting the label option, we need to slide the headers
11241676Sjpk 	 * and data back, while still maintaining the same alignment (module 8)
11251676Sjpk 	 * for the other options. So we slide the headers and data back only
11261676Sjpk 	 * by an integral multiple of 8 bytes, and fill the remaining bytes
11271676Sjpk 	 * with pads.
11281676Sjpk 	 */
11291676Sjpk 	delta = after_secopt - secopt;
11301676Sjpk 	pad = delta % 8;
11311676Sjpk 	if (pad == 1) {
11321676Sjpk 		secopt[0] = IP6OPT_PAD1;
11331676Sjpk 	} else if (pad > 1) {
11341676Sjpk 		secopt[0] = IP6OPT_PADN;
11351676Sjpk 		secopt[1] = pad - 2;
11361676Sjpk 		if (pad > 2)
11371676Sjpk 			bzero(&secopt[2], pad - 2);
11381676Sjpk 	}
11391676Sjpk 	secopt += pad;
11401676Sjpk 	delta -= pad;
11411676Sjpk 	ovbcopy(after_secopt, secopt,
11421676Sjpk 	    (uchar_t *)ip6h + buflen - after_secopt);
11431676Sjpk 	ip6hbh[1] -= delta/8;
11442776Skp158701 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - delta);
11451676Sjpk 
11464564Swy83408 	return (-delta);
11471676Sjpk }
11481676Sjpk 
11491676Sjpk /*
11501676Sjpk  * 'optbuf' contains a CIPSO label embedded in an IPv6 hop-by-hop option,
11511676Sjpk  * starting with the IP6OPT_LS option type. The format of this hop-by-hop
11521676Sjpk  * option is described in the block comment above tsol_compute_label_v6.
11531676Sjpk  * This function prepends this hop-by-hop option before any other hop-by-hop
11541676Sjpk  * options in the hop-by-hop header if one already exists, else a new
11551676Sjpk  * hop-by-hop header is created and stuffed into the packet following
11561676Sjpk  * the IPv6 header. 'buflen' is the total length of the packet i.e.
11571676Sjpk  * b_wptr - b_rptr. The caller ensures that there is enough space for the
11581676Sjpk  * extra option being added. Header and data following the position where
11591676Sjpk  * the label option is inserted are copied (i.e. slid forward) to the right
11601676Sjpk  * position.
11611676Sjpk  */
11621676Sjpk int
11631676Sjpk tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen)
11641676Sjpk {
11651676Sjpk 	/*
11661676Sjpk 	 * rawlen is the length of the label option in bytes, not including
11671676Sjpk 	 * any pads, starting from the IP6OPT_LS (option type) byte.
11681676Sjpk 	 */
11691676Sjpk 	uint_t	rawlen;
11701676Sjpk 
11711676Sjpk 	uint_t	optlen;		/* rawlen rounded to an 8 byte multiple */
11721676Sjpk 	uchar_t	*ip6hbh;	/* start of the hop-by-hop extension header */
11731676Sjpk 	uint_t	hbhlen;		/* Length of the hop-by-hop extension header */
11741676Sjpk 	uint_t	pad_len;
11751676Sjpk 	uchar_t	*pad_position;
11761676Sjpk 	int	delta;		/* Actual number of bytes inserted */
11771676Sjpk 
11781676Sjpk 	rawlen = optbuf[1] + 2;	/* Add 2 for the option type, option length */
11791676Sjpk 	ip6hbh = (uchar_t *)&ip6h[1];
11801676Sjpk 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
11811676Sjpk 		/*
11821676Sjpk 		 * There is a hop-by-hop header present already. In order to
11831676Sjpk 		 * preserve the alignment of the other options at the existing
11841676Sjpk 		 * value (modulo 8) we need to pad the label option to a
11851676Sjpk 		 * multiple of 8 bytes before prepending it to the other
11861676Sjpk 		 * options. Slide the extension headers and data forward to
11871676Sjpk 		 * accomodate the label option at the start of the hop-by-hop
11881676Sjpk 		 * header
11891676Sjpk 		 */
11901676Sjpk 		delta = optlen = (rawlen + 7) & ~7;
11911676Sjpk 		pad_len = optlen - rawlen;
11921676Sjpk 		pad_position = ip6hbh + 2 + rawlen;
11931676Sjpk 		ovbcopy(ip6hbh + 2, ip6hbh + 2 + optlen,
11941676Sjpk 		    buflen - (IPV6_HDR_LEN + 2));
11951676Sjpk 		/*
11961676Sjpk 		 * Bump up the hop-by-hop extension header length by
11971676Sjpk 		 * the number of 8-byte words added
11981676Sjpk 		 */
11991676Sjpk 		optlen >>= 3;
12001676Sjpk 		if (ip6hbh[1] + optlen > 255)
12011676Sjpk 			return (-1);
12021676Sjpk 		ip6hbh[1] += optlen;
12031676Sjpk 	} else {
12041676Sjpk 		/*
12051676Sjpk 		 * There is no hop-by-hop header in the packet. Construct a
12061676Sjpk 		 * new Hop-by-hop extension header (a multiple of 8 bytes).
12071676Sjpk 		 * Slide any other extension headers and data forward to
12081676Sjpk 		 * accomodate this hop-by-hop header
12091676Sjpk 		 */
12101676Sjpk 		delta = hbhlen = (2 + rawlen + 7) & ~7; /* +2 for nxthdr, len */
12111676Sjpk 		pad_len = hbhlen - (2 + rawlen);
12121676Sjpk 		pad_position = ip6hbh + 2 + rawlen;
12131676Sjpk 		ovbcopy(ip6hbh, ip6hbh + hbhlen, buflen - IPV6_HDR_LEN);
12141676Sjpk 		ip6hbh[0] = ip6h->ip6_nxt;
12151676Sjpk 		/*
12161676Sjpk 		 * hop-by-hop extension header length in 8-byte words, not
12171676Sjpk 		 * including the 1st 8 bytes of the hop-by-hop header.
12181676Sjpk 		 */
12191676Sjpk 		ip6hbh[1] = (hbhlen >> 3) - 1;
12201676Sjpk 		ip6h->ip6_nxt = IPPROTO_HOPOPTS;
12211676Sjpk 	}
12221676Sjpk 	/*
12231676Sjpk 	 * Copy the label option into the hop-by-hop header and insert any
12241676Sjpk 	 * needed pads
12251676Sjpk 	 */
12261676Sjpk 	bcopy(optbuf, ip6hbh + 2, rawlen);
12271676Sjpk 	if (pad_len == 1) {
12281676Sjpk 		pad_position[0] = IP6OPT_PAD1;
12291676Sjpk 	} else if (pad_len > 1) {
12301676Sjpk 		pad_position[0] = IP6OPT_PADN;
12311676Sjpk 		pad_position[1] = pad_len - 2;
12321676Sjpk 		if (pad_len > 2)
12331676Sjpk 			bzero(pad_position + 2, pad_len - 2);
12341676Sjpk 	}
12352776Skp158701 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + delta);
12361676Sjpk 	return (delta);
12371676Sjpk }
12381676Sjpk 
12391676Sjpk /*
12401676Sjpk  * tsol_check_label_v6()
12411676Sjpk  *
12421676Sjpk  * This routine computes the IP label that should be on the packet based on the
12431676Sjpk  * connection and destination information.  It's called only by the IP
12441676Sjpk  * forwarding logic, because all internal modules atop IP know how to generate
12451676Sjpk  * their own labels.
12461676Sjpk  *
12471676Sjpk  * Returns:
12481676Sjpk  *      0		Label on packet was already correct
1249*9710SKen.Powell@Sun.COM  *      EACCES		The packet failed the remote host accreditation.
12501676Sjpk  *      ENOMEM		Memory allocation failure.
12511676Sjpk  */
12521676Sjpk int
12536596Skp158701 tsol_check_label_v6(const cred_t *credp, mblk_t **mpp, boolean_t isexempt,
1254*9710SKen.Powell@Sun.COM     ip_stack_t *ipst, pid_t pid)
12551676Sjpk {
12561676Sjpk 	mblk_t *mp = *mpp;
12571676Sjpk 	ip6_t  *ip6h;
1258*9710SKen.Powell@Sun.COM 	cred_t *effective_cred;
12591676Sjpk 	/*
12601676Sjpk 	 * Label option length is limited to IP_MAX_OPT_LENGTH for
12611676Sjpk 	 * symmetry with IPv4. Can be relaxed if needed
12621676Sjpk 	 */
12631676Sjpk 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
12641676Sjpk 	uint_t hlen;
12651676Sjpk 	uint_t sec_opt_len; /* label option length not including type, len */
12666596Skp158701 	int delta_remove = 0, delta_add;
12671676Sjpk 	int retv;
12681676Sjpk 	uchar_t	*after_secopt;
12691676Sjpk 	uchar_t	*secopt = NULL;
12701676Sjpk 	uchar_t	*ip6hbh;
12711676Sjpk 	uint_t	hbhlen;
12721676Sjpk 	boolean_t hbh_needed;
12731676Sjpk 
1274*9710SKen.Powell@Sun.COM 	/*
1275*9710SKen.Powell@Sun.COM 	 * Verify the destination is allowed to receive packets at
1276*9710SKen.Powell@Sun.COM 	 * the security label of the message data. check_dest()
1277*9710SKen.Powell@Sun.COM 	 * may create a new effective cred with a modified label
1278*9710SKen.Powell@Sun.COM 	 * or label flags. Apply any such cred to the message block
1279*9710SKen.Powell@Sun.COM 	 * for use in future routing decisions.
1280*9710SKen.Powell@Sun.COM 	 */
12811676Sjpk 	ip6h = (ip6_t *)mp->b_rptr;
1282*9710SKen.Powell@Sun.COM 	retv = tsol_check_dest(credp, &ip6h->ip6_dst, IPV6_VERSION,
1283*9710SKen.Powell@Sun.COM 	    isexempt, &effective_cred);
12841676Sjpk 	if (retv != 0)
12851676Sjpk 		return (retv);
12861676Sjpk 
1287*9710SKen.Powell@Sun.COM 	/*
1288*9710SKen.Powell@Sun.COM 	 * Calculate the security label to be placed in the text
1289*9710SKen.Powell@Sun.COM 	 * of the message (if any).
1290*9710SKen.Powell@Sun.COM 	 */
1291*9710SKen.Powell@Sun.COM 	if (effective_cred != NULL) {
1292*9710SKen.Powell@Sun.COM 		if ((retv = tsol_compute_label_v6(effective_cred,
1293*9710SKen.Powell@Sun.COM 		    &ip6h->ip6_dst, opt_storage, ipst)) != 0) {
1294*9710SKen.Powell@Sun.COM 			crfree(effective_cred);
1295*9710SKen.Powell@Sun.COM 			return (retv);
1296*9710SKen.Powell@Sun.COM 		}
1297*9710SKen.Powell@Sun.COM 		mblk_setcred(mp, effective_cred, pid);
1298*9710SKen.Powell@Sun.COM 		crfree(effective_cred);
1299*9710SKen.Powell@Sun.COM 	} else {
1300*9710SKen.Powell@Sun.COM 		if ((retv = tsol_compute_label_v6(credp,
1301*9710SKen.Powell@Sun.COM 		    &ip6h->ip6_dst, opt_storage, ipst)) != 0)
1302*9710SKen.Powell@Sun.COM 			return (retv);
1303*9710SKen.Powell@Sun.COM 	}
1304*9710SKen.Powell@Sun.COM 
13051676Sjpk 	sec_opt_len = opt_storage[1];
13061676Sjpk 
13071676Sjpk 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
13081676Sjpk 		ip6hbh = (uchar_t *)&ip6h[1];
13091676Sjpk 		hbhlen = (ip6hbh[1] + 1) << 3;
13101676Sjpk 		secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
13111676Sjpk 		    &hbh_needed);
13121676Sjpk 	}
13131676Sjpk 
13141676Sjpk 	if (sec_opt_len == 0 && secopt == NULL) {
13151676Sjpk 		/*
13161676Sjpk 		 * The packet is not supposed to have a label, and it
13171676Sjpk 		 * does not have one currently
13181676Sjpk 		 */
13191676Sjpk 		return (0);
13201676Sjpk 	}
13211676Sjpk 	if (secopt != NULL && sec_opt_len != 0 &&
13221676Sjpk 	    (bcmp(opt_storage, secopt, sec_opt_len + 2) == 0)) {
13231676Sjpk 		/* The packet has the correct label already */
13241676Sjpk 		return (0);
13251676Sjpk 	}
13261676Sjpk 
13271676Sjpk 	/*
13281676Sjpk 	 * If there is an option there, then it must be the wrong one; delete.
13291676Sjpk 	 */
13306596Skp158701 	if (secopt != NULL) {
13316596Skp158701 		delta_remove = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
13326596Skp158701 		mp->b_wptr += delta_remove;
13336596Skp158701 	}
13341676Sjpk 
13351676Sjpk 	/*
13361676Sjpk 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
13371676Sjpk 	 * the hop-by-hop ext header's next header and length fields. Add
13381676Sjpk 	 * another 2 bytes for the label option type, len and then round
13391676Sjpk 	 * up to the next 8-byte multiple.
13401676Sjpk 	 */
13411676Sjpk 	hlen = (4 + sec_opt_len + 7) & ~7;
13421676Sjpk 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
13431676Sjpk 		int copylen;
13441676Sjpk 		mblk_t *new_mp;
13451676Sjpk 		uint16_t hdr_len;
13461676Sjpk 
13471676Sjpk 		hdr_len = ip_hdr_length_v6(mp, ip6h);
13481676Sjpk 		/*
13491676Sjpk 		 * Allocate enough to be meaningful, but not *too* much.
13501676Sjpk 		 * Also all the IPv6 extension headers must be in the same mblk
13511676Sjpk 		 */
13521676Sjpk 		copylen = MBLKL(mp);
13531676Sjpk 		if (copylen > 256)
13541676Sjpk 			copylen = 256;
13551676Sjpk 		if (copylen < hdr_len)
13561676Sjpk 			copylen = hdr_len;
13578778SErik.Nordmark@Sun.COM 		new_mp = allocb_tmpl(hlen + copylen +
13588778SErik.Nordmark@Sun.COM 		    (mp->b_rptr - mp->b_datap->db_base), mp);
13591676Sjpk 		if (new_mp == NULL)
13601676Sjpk 			return (ENOMEM);
13611676Sjpk 
13621676Sjpk 		/* keep the bias */
13631676Sjpk 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
13641676Sjpk 		new_mp->b_wptr = new_mp->b_rptr + copylen;
13651676Sjpk 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
13661676Sjpk 		new_mp->b_cont = mp;
13671676Sjpk 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
13681676Sjpk 			new_mp->b_cont = mp->b_cont;
13691676Sjpk 			freeb(mp);
13701676Sjpk 		}
13711676Sjpk 		*mpp = mp = new_mp;
13721676Sjpk 		ip6h = (ip6_t *)mp->b_rptr;
13731676Sjpk 	}
13741676Sjpk 
13756596Skp158701 	delta_add = tsol_prepend_option_v6(opt_storage, ip6h, MBLKL(mp));
13766596Skp158701 	if (delta_add == -1)
13771676Sjpk 		goto param_prob;
13781676Sjpk 
13796596Skp158701 	ASSERT(mp->b_wptr + delta_add <= DB_LIM(mp));
13806596Skp158701 	mp->b_wptr += delta_add;
13811676Sjpk 
13821676Sjpk 	return (0);
13831676Sjpk 
13841676Sjpk param_prob:
13851676Sjpk 	return (EINVAL);
13861676Sjpk }
13871676Sjpk 
13881676Sjpk /*
13891676Sjpk  * Update the given IPv6 "sticky options" structure to contain the provided
13901676Sjpk  * label, which is encoded as an IPv6 option.  Existing label is removed if
13911676Sjpk  * necessary, and storage is allocated/freed/resized.
13921676Sjpk  *
13931676Sjpk  * Returns 0 on success, errno on failure.
13941676Sjpk  */
13951676Sjpk int
13961676Sjpk tsol_update_sticky(ip6_pkt_t *ipp, uint_t *labellen, const uchar_t *labelopt)
13971676Sjpk {
13981676Sjpk 	int rawlen, optlen, newlen;
13991676Sjpk 	uchar_t *newopts;
14001676Sjpk 
14011676Sjpk 	/*
14021676Sjpk 	 * rawlen is the size of the IPv6 label to be inserted from labelopt.
14031676Sjpk 	 * optlen is the total length of that option, including any necessary
14041676Sjpk 	 * headers and padding.  newlen is the new size of the total hop-by-hop
14051676Sjpk 	 * options buffer, including user options.
14061676Sjpk 	 */
14072283Skp158701 	ASSERT(*labellen <= ipp->ipp_hopoptslen);
14082283Skp158701 	ASSERT((ipp->ipp_hopopts == NULL && ipp->ipp_hopoptslen == 0) ||
14092283Skp158701 	    (ipp->ipp_hopopts != NULL && ipp->ipp_hopoptslen != 0));
14102283Skp158701 
14111676Sjpk 	if ((rawlen = labelopt[1]) != 0) {
14121676Sjpk 		rawlen += 2;	/* add in header size */
14131676Sjpk 		optlen = (2 + rawlen + 7) & ~7;
14141676Sjpk 	} else {
14151676Sjpk 		optlen = 0;
14161676Sjpk 	}
14171676Sjpk 	newlen = ipp->ipp_hopoptslen + optlen - *labellen;
14182283Skp158701 	if (newlen == 0 && ipp->ipp_hopopts != NULL) {
14192283Skp158701 		/* Deleting all existing hop-by-hop options */
14202283Skp158701 		kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
14212283Skp158701 		ipp->ipp_hopopts = NULL;
14222283Skp158701 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
14232283Skp158701 	} else if (optlen != *labellen) {
14242283Skp158701 		/* If the label not same size as last time, then reallocate */
14251676Sjpk 		if (newlen > IP6_MAX_OPT_LENGTH)
14261676Sjpk 			return (EHOSTUNREACH);
14271676Sjpk 		newopts = kmem_alloc(newlen, KM_NOSLEEP);
14281676Sjpk 		if (newopts == NULL)
14291676Sjpk 			return (ENOMEM);
14301676Sjpk 		/*
14311676Sjpk 		 * If the user has hop-by-hop stickyoptions set, then copy his
14321676Sjpk 		 * options in after the security label.
14331676Sjpk 		 */
14341676Sjpk 		if (ipp->ipp_hopoptslen > *labellen) {
14351676Sjpk 			bcopy(ipp->ipp_hopopts + *labellen, newopts + optlen,
14361676Sjpk 			    ipp->ipp_hopoptslen - *labellen);
14371676Sjpk 			/*
14381676Sjpk 			 * Stomp out any header gunk here - this was the
14391676Sjpk 			 * previous next-header and option length field.
14401676Sjpk 			 */
14411676Sjpk 			newopts[optlen] = IP6OPT_PADN;
14421676Sjpk 			newopts[optlen + 1] = 0;
14431676Sjpk 		}
14441676Sjpk 		if (ipp->ipp_hopopts != NULL)
14451676Sjpk 			kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
14461676Sjpk 		ipp->ipp_hopopts = (ip6_hbh_t *)newopts;
14471676Sjpk 	}
14481676Sjpk 	ipp->ipp_hopoptslen = newlen;
14491676Sjpk 	*labellen = optlen;
14501676Sjpk 
14511676Sjpk 	newopts = (uchar_t *)ipp->ipp_hopopts;
14521676Sjpk 
14531676Sjpk 	/* If there are any options, then fix up reported length */
14541676Sjpk 	if (newlen > 0) {
14551676Sjpk 		newopts[1] = (newlen + 7) / 8 - 1;
14561676Sjpk 		ipp->ipp_fields |= IPPF_HOPOPTS;
14571676Sjpk 	}
14581676Sjpk 
14591676Sjpk 	/* If there's a label, then insert it now */
14601676Sjpk 	if (optlen > 0) {
14611676Sjpk 		/* skip next-header and length fields */
14621676Sjpk 		newopts += 2;
14631676Sjpk 		bcopy(labelopt, newopts, rawlen);
14641676Sjpk 		newopts += rawlen;
14651676Sjpk 		/* make sure padding comes out right */
14661676Sjpk 		optlen -= 2 + rawlen;
14671676Sjpk 		if (optlen == 1) {
14681676Sjpk 			newopts[0] = IP6OPT_PAD1;
14691676Sjpk 		} else if (optlen > 1) {
14701676Sjpk 			newopts[0] = IP6OPT_PADN;
14711676Sjpk 			optlen -=  2;
14721676Sjpk 			newopts[1] = optlen;
14731676Sjpk 			if (optlen > 0)
14741676Sjpk 				bzero(newopts + 2, optlen);
14751676Sjpk 		}
14761676Sjpk 	}
14771676Sjpk 	return (0);
14781676Sjpk }
14791676Sjpk 
14801676Sjpk int
14811676Sjpk tsol_update_options(uchar_t **opts, uint_t *totlen, uint_t *labellen,
14821676Sjpk     const uchar_t *labelopt)
14831676Sjpk {
14841676Sjpk 	int optlen, newlen;
14851676Sjpk 	uchar_t *newopts;
14861676Sjpk 
14871676Sjpk 	optlen = (labelopt[IPOPT_OLEN] + 3) & ~3;
14881676Sjpk 	newlen = *totlen + optlen - *labellen;
14891676Sjpk 	if (optlen > *labellen) {
14901676Sjpk 		if (newlen > IP_MAX_OPT_LENGTH)
14911676Sjpk 			return (EHOSTUNREACH);
14921676Sjpk 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
14931676Sjpk 		if (newopts == NULL)
14941676Sjpk 			return (ENOMEM);
14951676Sjpk 		if (*totlen > *labellen) {
14961676Sjpk 			bcopy(*opts + *labellen, newopts + optlen,
14971676Sjpk 			    *totlen - *labellen);
14981676Sjpk 		}
14991676Sjpk 		if (*opts != NULL)
15001676Sjpk 			mi_free((char *)*opts);
15011676Sjpk 		*opts = newopts;
15021676Sjpk 	} else if (optlen < *labellen) {
15031676Sjpk 		if (newlen == 0 && *opts != NULL) {
15041676Sjpk 			mi_free((char *)*opts);
15051676Sjpk 			*opts = NULL;
15061676Sjpk 		}
15071676Sjpk 		if (*totlen > *labellen) {
15081676Sjpk 			ovbcopy(*opts + *labellen, *opts + optlen,
15091676Sjpk 			    *totlen - *labellen);
15101676Sjpk 		}
15111676Sjpk 	}
15121676Sjpk 	*totlen = newlen;
15131676Sjpk 	*labellen = optlen;
15141676Sjpk 	if (optlen > 0) {
15151676Sjpk 		newopts = *opts;
15161676Sjpk 		bcopy(labelopt, newopts, optlen);
15171676Sjpk 		/* check if there are user-supplied options that follow */
15181676Sjpk 		if (optlen < newlen) {
15191676Sjpk 			/* compute amount of embedded alignment needed */
15201676Sjpk 			optlen -= newopts[IPOPT_OLEN];
15211676Sjpk 			newopts += newopts[IPOPT_OLEN];
15221676Sjpk 			while (--optlen >= 0)
15231676Sjpk 				*newopts++ = IPOPT_NOP;
15241676Sjpk 		} else if (optlen != newopts[IPOPT_OLEN]) {
15251676Sjpk 			/*
15261676Sjpk 			 * The label option is the only option and it is
15271676Sjpk 			 * not a multiple of 4 bytes.
15281676Sjpk 			 */
15291676Sjpk 			optlen -= newopts[IPOPT_OLEN];
15301676Sjpk 			newopts += newopts[IPOPT_OLEN];
15311676Sjpk 			while (--optlen >= 0)
15321676Sjpk 				*newopts++ = IPOPT_EOL;
15331676Sjpk 		}
15341676Sjpk 	}
15351676Sjpk 	return (0);
15361676Sjpk }
15371676Sjpk 
15381676Sjpk /*
15391676Sjpk  * This does the bulk of the processing for setting IPPROTO_IP {T_,}IP_OPTIONS.
15401676Sjpk  */
15411676Sjpk boolean_t
15421676Sjpk tsol_option_set(uchar_t **opts, uint_t *optlen, uint_t labellen,
15431676Sjpk     const uchar_t *useropts, uint_t userlen)
15441676Sjpk {
15451676Sjpk 	int newlen;
15461676Sjpk 	uchar_t *newopts;
15471676Sjpk 
15481676Sjpk 	newlen = userlen + labellen;
15491676Sjpk 	if (newlen > *optlen) {
15501676Sjpk 		/* need more room */
15511676Sjpk 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
15521676Sjpk 		if (newopts == NULL)
15531720Spwernau 			return (B_FALSE);
15541676Sjpk 		/*
15551676Sjpk 		 * The supplied *opts can't be NULL in this case,
15561676Sjpk 		 * since there's an existing label.
15571676Sjpk 		 */
15581676Sjpk 		if (labellen > 0)
15591676Sjpk 			bcopy(*opts, newopts, labellen);
15601676Sjpk 		if (*opts != NULL)
15611676Sjpk 			mi_free((char *)*opts);
15621676Sjpk 		*opts = newopts;
15631676Sjpk 	}
15641676Sjpk 
15651676Sjpk 	if (newlen == 0) {
15661676Sjpk 		/* special case -- no remaining IP options at all */
15671676Sjpk 		if (*opts != NULL) {
15681676Sjpk 			mi_free((char *)*opts);
15691676Sjpk 			*opts = NULL;
15701676Sjpk 		}
15711676Sjpk 	} else if (userlen > 0) {
15721676Sjpk 		/* merge in the user's options */
15731676Sjpk 		newopts = *opts;
15741676Sjpk 		if (labellen > 0) {
15751676Sjpk 			int extra = labellen - newopts[IPOPT_OLEN];
15761676Sjpk 
15771676Sjpk 			newopts += newopts[IPOPT_OLEN];
15781676Sjpk 			while (--extra >= 0)
15791676Sjpk 				*newopts++ = IPOPT_NOP;
15801676Sjpk 		}
15811676Sjpk 		bcopy(useropts, newopts, userlen);
15821676Sjpk 	}
15831676Sjpk 
15841676Sjpk 	*optlen = newlen;
15851720Spwernau 	return (B_TRUE);
15861676Sjpk }
1587