xref: /onnv-gate/usr/src/uts/common/inet/ip/icmp.c (revision 8778:b4169d2ab299)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51289Sja97890  * Common Development and Distribution License (the "License").
61289Sja97890  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
228477SRao.Shoaib@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate /* Copyright (c) 1990 Mentat Inc. */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <sys/types.h>
280Sstevel@tonic-gate #include <sys/stream.h>
290Sstevel@tonic-gate #include <sys/stropts.h>
300Sstevel@tonic-gate #include <sys/strlog.h>
310Sstevel@tonic-gate #include <sys/strsun.h>
320Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
330Sstevel@tonic-gate #include <sys/tihdr.h>
340Sstevel@tonic-gate #include <sys/timod.h>
350Sstevel@tonic-gate #include <sys/ddi.h>
360Sstevel@tonic-gate #include <sys/sunddi.h>
371676Sjpk #include <sys/strsubr.h>
380Sstevel@tonic-gate #include <sys/cmn_err.h>
390Sstevel@tonic-gate #include <sys/debug.h>
400Sstevel@tonic-gate #include <sys/kmem.h>
410Sstevel@tonic-gate #include <sys/policy.h>
421676Sjpk #include <sys/priv.h>
430Sstevel@tonic-gate #include <sys/zone.h>
441673Sgt145670 #include <sys/time.h>
450Sstevel@tonic-gate 
468348SEric.Yu@Sun.COM #include <sys/sockio.h>
470Sstevel@tonic-gate #include <sys/socket.h>
488348SEric.Yu@Sun.COM #include <sys/socketvar.h>
490Sstevel@tonic-gate #include <sys/isa_defs.h>
500Sstevel@tonic-gate #include <sys/suntpi.h>
510Sstevel@tonic-gate #include <sys/xti_inet.h>
523448Sdh155122 #include <sys/netstack.h>
530Sstevel@tonic-gate 
540Sstevel@tonic-gate #include <net/route.h>
550Sstevel@tonic-gate #include <net/if.h>
560Sstevel@tonic-gate 
570Sstevel@tonic-gate #include <netinet/in.h>
580Sstevel@tonic-gate #include <netinet/ip6.h>
590Sstevel@tonic-gate #include <netinet/icmp6.h>
600Sstevel@tonic-gate #include <inet/common.h>
610Sstevel@tonic-gate #include <inet/ip.h>
620Sstevel@tonic-gate #include <inet/ip6.h>
638348SEric.Yu@Sun.COM #include <inet/proto_set.h>
640Sstevel@tonic-gate #include <inet/nd.h>
650Sstevel@tonic-gate #include <inet/optcom.h>
660Sstevel@tonic-gate #include <inet/snmpcom.h>
670Sstevel@tonic-gate #include <inet/kstatcom.h>
680Sstevel@tonic-gate #include <inet/rawip_impl.h>
690Sstevel@tonic-gate 
700Sstevel@tonic-gate #include <netinet/ip_mroute.h>
710Sstevel@tonic-gate #include <inet/tcp.h>
720Sstevel@tonic-gate #include <net/pfkeyv2.h>
730Sstevel@tonic-gate #include <inet/ipsec_info.h>
740Sstevel@tonic-gate #include <inet/ipclassifier.h>
750Sstevel@tonic-gate 
761676Sjpk #include <sys/tsol/label.h>
771676Sjpk #include <sys/tsol/tnet.h>
781676Sjpk 
793318Srshoaib #include <inet/ip_ire.h>
803318Srshoaib #include <inet/ip_if.h>
813318Srshoaib 
823318Srshoaib #include <inet/ip_impl.h>
838348SEric.Yu@Sun.COM #include <sys/disp.h>
843318Srshoaib 
850Sstevel@tonic-gate /*
860Sstevel@tonic-gate  * Synchronization notes:
870Sstevel@tonic-gate  *
885240Snordmark  * RAWIP is MT and uses the usual kernel synchronization primitives. There is
895240Snordmark  * locks, which is icmp_rwlock. We also use conn_lock when updating things
905240Snordmark  * which affect the IP classifier lookup.
915240Snordmark  * The lock order is icmp_rwlock -> conn_lock.
925240Snordmark  *
935240Snordmark  * The icmp_rwlock:
945240Snordmark  * This protects most of the other fields in the icmp_t. The exact list of
955240Snordmark  * fields which are protected by each of the above locks is documented in
965240Snordmark  * the icmp_t structure definition.
975240Snordmark  *
985240Snordmark  * Plumbing notes:
995240Snordmark  * ICMP is always a device driver. For compatibility with mibopen() code
1005240Snordmark  * it is possible to I_PUSH "icmp", but that results in pushing a passthrough
1015240Snordmark  * dummy module.
1020Sstevel@tonic-gate  */
1030Sstevel@tonic-gate 
1040Sstevel@tonic-gate static void	icmp_addr_req(queue_t *q, mblk_t *mp);
1058348SEric.Yu@Sun.COM static void	icmp_tpi_bind(queue_t *q, mblk_t *mp);
1068348SEric.Yu@Sun.COM static int	icmp_bind_proto(conn_t *connp);
1075240Snordmark static int	icmp_build_hdrs(icmp_t *icmp);
1080Sstevel@tonic-gate static void	icmp_capability_req(queue_t *q, mblk_t *mp);
1098348SEric.Yu@Sun.COM static int	icmp_close(queue_t *q, int flags);
1108348SEric.Yu@Sun.COM static void	icmp_tpi_connect(queue_t *q, mblk_t *mp);
1118348SEric.Yu@Sun.COM static void	icmp_tpi_disconnect(queue_t *q, mblk_t *mp);
1120Sstevel@tonic-gate static void	icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
1130Sstevel@tonic-gate 		    int sys_error);
1140Sstevel@tonic-gate static void	icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
1150Sstevel@tonic-gate 		    t_scalar_t t_error, int sys_error);
1168348SEric.Yu@Sun.COM static void	icmp_icmp_error(conn_t *connp, mblk_t *mp);
1178348SEric.Yu@Sun.COM static void	icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp);
1180Sstevel@tonic-gate static void	icmp_info_req(queue_t *q, mblk_t *mp);
1195240Snordmark static void	icmp_input(void *, mblk_t *, void *);
1208348SEric.Yu@Sun.COM static conn_t 	*icmp_open(int family, cred_t *credp, int *err, int flags);
1215240Snordmark static int	icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
1225240Snordmark 		    cred_t *credp);
1235240Snordmark static int	icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
1245240Snordmark 		    cred_t *credp);
1250Sstevel@tonic-gate static int	icmp_unitdata_opt_process(queue_t *q, mblk_t *mp,
1260Sstevel@tonic-gate 		    int *errorp, void *thisdg_attrs);
1270Sstevel@tonic-gate static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
1288348SEric.Yu@Sun.COM int		icmp_opt_set(conn_t *connp, uint_t optset_context,
1290Sstevel@tonic-gate 		    int level, int name, uint_t inlen,
1300Sstevel@tonic-gate 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
1318348SEric.Yu@Sun.COM 		    void *thisdg_attrs, cred_t *cr);
1328348SEric.Yu@Sun.COM int		icmp_opt_get(conn_t *connp, int level, int name,
1330Sstevel@tonic-gate 		    uchar_t *ptr);
1340Sstevel@tonic-gate static int	icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
1353448Sdh155122 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt);
1360Sstevel@tonic-gate static int	icmp_param_set(queue_t *q, mblk_t *mp, char *value,
1370Sstevel@tonic-gate 		    caddr_t cp, cred_t *cr);
1380Sstevel@tonic-gate static int	icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
1390Sstevel@tonic-gate 		    uchar_t *ptr, int len);
1400Sstevel@tonic-gate static int	icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
1410Sstevel@tonic-gate 		    cred_t *cr);
1420Sstevel@tonic-gate static void	icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
1438348SEric.Yu@Sun.COM static void	icmp_tpi_unbind(queue_t *q, mblk_t *mp);
144*8778SErik.Nordmark@Sun.COM static int	icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst);
1450Sstevel@tonic-gate static void	icmp_wput(queue_t *q, mblk_t *mp);
1468348SEric.Yu@Sun.COM static void	icmp_wput_fallback(queue_t *q, mblk_t *mp);
1478348SEric.Yu@Sun.COM static int	raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp,
1488348SEric.Yu@Sun.COM 		    sin6_t *sin6, ip6_pkt_t *ipp);
1498348SEric.Yu@Sun.COM static int	raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp,
1508348SEric.Yu@Sun.COM 		    ipaddr_t v4dst, ip4_pkt_t *pktinfop);
1510Sstevel@tonic-gate static void	icmp_wput_other(queue_t *q, mblk_t *mp);
1520Sstevel@tonic-gate static void	icmp_wput_iocdata(queue_t *q, mblk_t *mp);
1530Sstevel@tonic-gate static void	icmp_wput_restricted(queue_t *q, mblk_t *mp);
1540Sstevel@tonic-gate 
1553448Sdh155122 static void	*rawip_stack_init(netstackid_t stackid, netstack_t *ns);
1563448Sdh155122 static void	rawip_stack_fini(netstackid_t stackid, void *arg);
1573448Sdh155122 
1583448Sdh155122 static void	*rawip_kstat_init(netstackid_t stackid);
1593448Sdh155122 static void	rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp);
1600Sstevel@tonic-gate static int	rawip_kstat_update(kstat_t *kp, int rw);
1618348SEric.Yu@Sun.COM static void	rawip_stack_shutdown(netstackid_t stackid, void *arg);
1628348SEric.Yu@Sun.COM static int	rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa,
1638348SEric.Yu@Sun.COM 		    uint_t *salenp);
1648348SEric.Yu@Sun.COM static int	rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa,
1658348SEric.Yu@Sun.COM 		    uint_t *salenp);
1668348SEric.Yu@Sun.COM 
1678348SEric.Yu@Sun.COM int		rawip_getsockname(sock_lower_handle_t, struct sockaddr *,
1688348SEric.Yu@Sun.COM 		    socklen_t *, cred_t *);
1698348SEric.Yu@Sun.COM int		rawip_getpeername(sock_lower_handle_t, struct sockaddr *,
1708348SEric.Yu@Sun.COM 		    socklen_t *, cred_t *);
1710Sstevel@tonic-gate 
1725240Snordmark static struct module_info icmp_mod_info =  {
1730Sstevel@tonic-gate 	5707, "icmp", 1, INFPSZ, 512, 128
1740Sstevel@tonic-gate };
1750Sstevel@tonic-gate 
1765240Snordmark /*
1775240Snordmark  * Entry points for ICMP as a device.
1785240Snordmark  * We have separate open functions for the /dev/icmp and /dev/icmp6 devices.
1795240Snordmark  */
1805240Snordmark static struct qinit icmprinitv4 = {
1815240Snordmark 	NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info
1825240Snordmark };
1835240Snordmark 
1845240Snordmark static struct qinit icmprinitv6 = {
1855240Snordmark 	NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info
1860Sstevel@tonic-gate };
1870Sstevel@tonic-gate 
1885240Snordmark static struct qinit icmpwinit = {
1898348SEric.Yu@Sun.COM 	(pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info
1908348SEric.Yu@Sun.COM };
1918348SEric.Yu@Sun.COM 
1928348SEric.Yu@Sun.COM /* ICMP entry point during fallback */
1938348SEric.Yu@Sun.COM static struct qinit icmp_fallback_sock_winit = {
1948348SEric.Yu@Sun.COM 	(pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info
1950Sstevel@tonic-gate };
1960Sstevel@tonic-gate 
1975240Snordmark /* For AF_INET aka /dev/icmp */
1985240Snordmark struct streamtab icmpinfov4 = {
1995240Snordmark 	&icmprinitv4, &icmpwinit
2005240Snordmark };
2015240Snordmark 
2025240Snordmark /* For AF_INET6 aka /dev/icmp6 */
2035240Snordmark struct streamtab icmpinfov6 = {
2045240Snordmark 	&icmprinitv6, &icmpwinit
2050Sstevel@tonic-gate };
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate static sin_t	sin_null;	/* Zero address for quick clears */
2080Sstevel@tonic-gate static sin6_t	sin6_null;	/* Zero address for quick clears */
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate /* Default structure copied into T_INFO_ACK messages */
2110Sstevel@tonic-gate static struct T_info_ack icmp_g_t_info_ack = {
2120Sstevel@tonic-gate 	T_INFO_ACK,
2130Sstevel@tonic-gate 	IP_MAXPACKET,	 /* TSDU_size.  icmp allows maximum size messages. */
2140Sstevel@tonic-gate 	T_INVALID,	/* ETSDU_size.  icmp does not support expedited data. */
2150Sstevel@tonic-gate 	T_INVALID,	/* CDATA_size. icmp does not support connect data. */
2160Sstevel@tonic-gate 	T_INVALID,	/* DDATA_size. icmp does not support disconnect data. */
2170Sstevel@tonic-gate 	0,		/* ADDR_size - filled in later. */
2180Sstevel@tonic-gate 	0,		/* OPT_size - not initialized here */
2190Sstevel@tonic-gate 	IP_MAXPACKET,	/* TIDU_size.  icmp allows maximum size messages. */
2200Sstevel@tonic-gate 	T_CLTS,		/* SERV_type.  icmp supports connection-less. */
2210Sstevel@tonic-gate 	TS_UNBND,	/* CURRENT_state.  This is set from icmp_state. */
2220Sstevel@tonic-gate 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
2230Sstevel@tonic-gate };
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate /*
2263448Sdh155122  * Table of ND variables supported by icmp.  These are loaded into is_nd
2273448Sdh155122  * when the stack instance is created.
2280Sstevel@tonic-gate  * All of these are alterable, within the min/max values given, at run time.
2290Sstevel@tonic-gate  */
2300Sstevel@tonic-gate static icmpparam_t	icmp_param_arr[] = {
2310Sstevel@tonic-gate 	/* min	max	value	name */
2320Sstevel@tonic-gate 	{ 0,	128,	32,	"icmp_wroff_extra" },
2330Sstevel@tonic-gate 	{ 1,	255,	255,	"icmp_ipv4_ttl" },
2340Sstevel@tonic-gate 	{ 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS,	"icmp_ipv6_hoplimit"},
2350Sstevel@tonic-gate 	{ 0,	1,	1,	"icmp_bsd_compat" },
2360Sstevel@tonic-gate 	{ 4096,	65536,	8192,	"icmp_xmit_hiwat"},
2370Sstevel@tonic-gate 	{ 0,	65536,	1024,	"icmp_xmit_lowat"},
2380Sstevel@tonic-gate 	{ 4096,	65536,	8192,	"icmp_recv_hiwat"},
2390Sstevel@tonic-gate 	{ 65536, 1024*1024*1024, 256*1024,	"icmp_max_buf"},
2400Sstevel@tonic-gate };
2413448Sdh155122 #define	is_wroff_extra			is_param_arr[0].icmp_param_value
2423448Sdh155122 #define	is_ipv4_ttl			is_param_arr[1].icmp_param_value
2433448Sdh155122 #define	is_ipv6_hoplimit		is_param_arr[2].icmp_param_value
2443448Sdh155122 #define	is_bsd_compat			is_param_arr[3].icmp_param_value
2453448Sdh155122 #define	is_xmit_hiwat			is_param_arr[4].icmp_param_value
2463448Sdh155122 #define	is_xmit_lowat			is_param_arr[5].icmp_param_value
2473448Sdh155122 #define	is_recv_hiwat			is_param_arr[6].icmp_param_value
2483448Sdh155122 #define	is_max_buf			is_param_arr[7].icmp_param_value
2490Sstevel@tonic-gate 
2508348SEric.Yu@Sun.COM static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len);
2518348SEric.Yu@Sun.COM static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa,
252*8778SErik.Nordmark@Sun.COM     socklen_t len, cred_t *cr);
2538348SEric.Yu@Sun.COM static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error);
2548348SEric.Yu@Sun.COM 
2550Sstevel@tonic-gate /*
2560Sstevel@tonic-gate  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
2570Sstevel@tonic-gate  * passed to icmp_wput.
2580Sstevel@tonic-gate  * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP
2590Sstevel@tonic-gate  * protocol type placed in the message following the address. A T_BIND_ACK
2605240Snordmark  * message is returned by ip_bind_v4/v6.
2610Sstevel@tonic-gate  */
2620Sstevel@tonic-gate static void
2638348SEric.Yu@Sun.COM icmp_tpi_bind(queue_t *q, mblk_t *mp)
2640Sstevel@tonic-gate {
2658348SEric.Yu@Sun.COM 	int	error;
2668348SEric.Yu@Sun.COM 	struct sockaddr *sa;
2678348SEric.Yu@Sun.COM 	struct T_bind_req *tbr;
2688348SEric.Yu@Sun.COM 	socklen_t	len;
2690Sstevel@tonic-gate 	sin_t	*sin;
2700Sstevel@tonic-gate 	sin6_t	*sin6;
2718348SEric.Yu@Sun.COM 	icmp_t		*icmp;
2725240Snordmark 	conn_t	*connp = Q_TO_CONN(q);
2738348SEric.Yu@Sun.COM 	mblk_t *mp1;
274*8778SErik.Nordmark@Sun.COM 	cred_t *cr;
275*8778SErik.Nordmark@Sun.COM 
276*8778SErik.Nordmark@Sun.COM 	/*
277*8778SErik.Nordmark@Sun.COM 	 * All Solaris components should pass a db_credp
278*8778SErik.Nordmark@Sun.COM 	 * for this TPI message, hence we ASSERT.
279*8778SErik.Nordmark@Sun.COM 	 * But in case there is some other M_PROTO that looks
280*8778SErik.Nordmark@Sun.COM 	 * like a TPI message sent by some other kernel
281*8778SErik.Nordmark@Sun.COM 	 * component, we check and return an error.
282*8778SErik.Nordmark@Sun.COM 	 */
283*8778SErik.Nordmark@Sun.COM 	cr = msg_getcred(mp, NULL);
284*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
285*8778SErik.Nordmark@Sun.COM 	if (cr == NULL) {
286*8778SErik.Nordmark@Sun.COM 		icmp_err_ack(q, mp, TSYSERR, EINVAL);
287*8778SErik.Nordmark@Sun.COM 		return;
288*8778SErik.Nordmark@Sun.COM 	}
2895240Snordmark 
2905240Snordmark 	icmp = connp->conn_icmp;
2910Sstevel@tonic-gate 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
2920Sstevel@tonic-gate 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
2930Sstevel@tonic-gate 		    "icmp_bind: bad req, len %u",
2940Sstevel@tonic-gate 		    (uint_t)(mp->b_wptr - mp->b_rptr));
2950Sstevel@tonic-gate 		icmp_err_ack(q, mp, TPROTO, 0);
2960Sstevel@tonic-gate 		return;
2970Sstevel@tonic-gate 	}
2988348SEric.Yu@Sun.COM 
2990Sstevel@tonic-gate 	if (icmp->icmp_state != TS_UNBND) {
3000Sstevel@tonic-gate 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
3010Sstevel@tonic-gate 		    "icmp_bind: bad state, %d", icmp->icmp_state);
3020Sstevel@tonic-gate 		icmp_err_ack(q, mp, TOUTSTATE, 0);
3030Sstevel@tonic-gate 		return;
3040Sstevel@tonic-gate 	}
3058348SEric.Yu@Sun.COM 
3060Sstevel@tonic-gate 	/*
3070Sstevel@tonic-gate 	 * Reallocate the message to make sure we have enough room for an
3080Sstevel@tonic-gate 	 * address and the protocol type.
3090Sstevel@tonic-gate 	 */
3100Sstevel@tonic-gate 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
3110Sstevel@tonic-gate 	if (!mp1) {
3120Sstevel@tonic-gate 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
3130Sstevel@tonic-gate 		return;
3140Sstevel@tonic-gate 	}
3150Sstevel@tonic-gate 	mp = mp1;
3168348SEric.Yu@Sun.COM 
3178348SEric.Yu@Sun.COM 	/* Reset the message type in preparation for shipping it back. */
3188348SEric.Yu@Sun.COM 	DB_TYPE(mp) = M_PCPROTO;
3190Sstevel@tonic-gate 	tbr = (struct T_bind_req *)mp->b_rptr;
3208348SEric.Yu@Sun.COM 	len = tbr->ADDR_length;
3218348SEric.Yu@Sun.COM 	switch (len) {
3228348SEric.Yu@Sun.COM 	case 0:	/* request for a generic port */
3230Sstevel@tonic-gate 		tbr->ADDR_offset = sizeof (struct T_bind_req);
3240Sstevel@tonic-gate 		if (icmp->icmp_family == AF_INET) {
3250Sstevel@tonic-gate 			tbr->ADDR_length = sizeof (sin_t);
3260Sstevel@tonic-gate 			sin = (sin_t *)&tbr[1];
3270Sstevel@tonic-gate 			*sin = sin_null;
3280Sstevel@tonic-gate 			sin->sin_family = AF_INET;
3290Sstevel@tonic-gate 			mp->b_wptr = (uchar_t *)&sin[1];
3308348SEric.Yu@Sun.COM 			sa = (struct sockaddr *)sin;
3318348SEric.Yu@Sun.COM 			len = sizeof (sin_t);
3320Sstevel@tonic-gate 		} else {
3330Sstevel@tonic-gate 			ASSERT(icmp->icmp_family == AF_INET6);
3340Sstevel@tonic-gate 			tbr->ADDR_length = sizeof (sin6_t);
3350Sstevel@tonic-gate 			sin6 = (sin6_t *)&tbr[1];
3360Sstevel@tonic-gate 			*sin6 = sin6_null;
3370Sstevel@tonic-gate 			sin6->sin6_family = AF_INET6;
3380Sstevel@tonic-gate 			mp->b_wptr = (uchar_t *)&sin6[1];
3398348SEric.Yu@Sun.COM 			sa = (struct sockaddr *)sin6;
3408348SEric.Yu@Sun.COM 			len = sizeof (sin6_t);
3410Sstevel@tonic-gate 		}
3420Sstevel@tonic-gate 		break;
3438348SEric.Yu@Sun.COM 
3448348SEric.Yu@Sun.COM 	case sizeof (sin_t):	/* Complete IPv4 address */
3458348SEric.Yu@Sun.COM 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
3468348SEric.Yu@Sun.COM 		    sizeof (sin_t));
3470Sstevel@tonic-gate 		break;
3488348SEric.Yu@Sun.COM 
3498348SEric.Yu@Sun.COM 	case sizeof (sin6_t):	/* Complete IPv6 address */
3508348SEric.Yu@Sun.COM 		sa = (struct sockaddr *)mi_offset_param(mp,
3518348SEric.Yu@Sun.COM 		    tbr->ADDR_offset, sizeof (sin6_t));
3528348SEric.Yu@Sun.COM 		break;
3538348SEric.Yu@Sun.COM 
3540Sstevel@tonic-gate 	default:
3550Sstevel@tonic-gate 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
3560Sstevel@tonic-gate 		    "icmp_bind: bad ADDR_length %d", tbr->ADDR_length);
3570Sstevel@tonic-gate 		icmp_err_ack(q, mp, TBADADDR, 0);
3580Sstevel@tonic-gate 		return;
3590Sstevel@tonic-gate 	}
3605240Snordmark 
3618348SEric.Yu@Sun.COM 	error = rawip_do_bind(connp, sa, len);
3628348SEric.Yu@Sun.COM done:
3638348SEric.Yu@Sun.COM 	ASSERT(mp->b_cont == NULL);
3648348SEric.Yu@Sun.COM 	if (error != 0) {
3658348SEric.Yu@Sun.COM 		if (error > 0) {
3668348SEric.Yu@Sun.COM 			icmp_err_ack(q, mp, TSYSERR, error);
3678348SEric.Yu@Sun.COM 		} else {
3688348SEric.Yu@Sun.COM 			icmp_err_ack(q, mp, -error, 0);
3698348SEric.Yu@Sun.COM 		}
3708348SEric.Yu@Sun.COM 	} else {
3718348SEric.Yu@Sun.COM 		tbr->PRIM_type = T_BIND_ACK;
3728348SEric.Yu@Sun.COM 		qreply(q, mp);
3738348SEric.Yu@Sun.COM 	}
3748348SEric.Yu@Sun.COM }
3758348SEric.Yu@Sun.COM 
3768348SEric.Yu@Sun.COM static int
3778348SEric.Yu@Sun.COM rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len)
3788348SEric.Yu@Sun.COM {
3798348SEric.Yu@Sun.COM 	sin_t		*sin;
3808348SEric.Yu@Sun.COM 	sin6_t		*sin6;
3818348SEric.Yu@Sun.COM 	icmp_t		*icmp;
3828348SEric.Yu@Sun.COM 	int		error = 0;
3838348SEric.Yu@Sun.COM 	mblk_t		*ire_mp;
3848348SEric.Yu@Sun.COM 
3858348SEric.Yu@Sun.COM 
3868348SEric.Yu@Sun.COM 	icmp = connp->conn_icmp;
3878348SEric.Yu@Sun.COM 
3888348SEric.Yu@Sun.COM 	if (sa == NULL || !OK_32PTR((char *)sa)) {
3898348SEric.Yu@Sun.COM 		return (EINVAL);
3908348SEric.Yu@Sun.COM 	}
3918348SEric.Yu@Sun.COM 
3925240Snordmark 	/*
3935240Snordmark 	 * The state must be TS_UNBND. TPI mandates that users must send
3945240Snordmark 	 * TPI primitives only 1 at a time and wait for the response before
3955240Snordmark 	 * sending the next primitive.
3965240Snordmark 	 */
3975240Snordmark 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3985240Snordmark 	if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) {
3998348SEric.Yu@Sun.COM 		error = -TOUTSTATE;
4008348SEric.Yu@Sun.COM 		goto done;
4018348SEric.Yu@Sun.COM 	}
4028348SEric.Yu@Sun.COM 
4038348SEric.Yu@Sun.COM 	ASSERT(len != 0);
4048348SEric.Yu@Sun.COM 	switch (len) {
4058348SEric.Yu@Sun.COM 	case sizeof (sin_t):    /* Complete IPv4 address */
4068348SEric.Yu@Sun.COM 		sin = (sin_t *)sa;
4078348SEric.Yu@Sun.COM 		if (sin->sin_family != AF_INET ||
4088348SEric.Yu@Sun.COM 		    icmp->icmp_family != AF_INET) {
4098348SEric.Yu@Sun.COM 			/* TSYSERR, EAFNOSUPPORT */
4108348SEric.Yu@Sun.COM 			error = EAFNOSUPPORT;
4118348SEric.Yu@Sun.COM 			goto done;
4128348SEric.Yu@Sun.COM 		}
4138348SEric.Yu@Sun.COM 		break;
4148348SEric.Yu@Sun.COM 	case sizeof (sin6_t): /* Complete IPv6 address */
4158348SEric.Yu@Sun.COM 		sin6 = (sin6_t *)sa;
4168348SEric.Yu@Sun.COM 		if (sin6->sin6_family != AF_INET6 ||
4178348SEric.Yu@Sun.COM 		    icmp->icmp_family != AF_INET6) {
4188348SEric.Yu@Sun.COM 			/* TSYSERR, EAFNOSUPPORT */
4198348SEric.Yu@Sun.COM 			error = EAFNOSUPPORT;
4208348SEric.Yu@Sun.COM 			goto done;
4218348SEric.Yu@Sun.COM 		}
4228348SEric.Yu@Sun.COM 		/* No support for mapped addresses on raw sockets */
4238348SEric.Yu@Sun.COM 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
4248348SEric.Yu@Sun.COM 			/* TSYSERR, EADDRNOTAVAIL */
4258348SEric.Yu@Sun.COM 			error = EADDRNOTAVAIL;
4268348SEric.Yu@Sun.COM 			goto done;
4278348SEric.Yu@Sun.COM 		}
4288348SEric.Yu@Sun.COM 		break;
4298348SEric.Yu@Sun.COM 
4308348SEric.Yu@Sun.COM 	default:
4318348SEric.Yu@Sun.COM 		/* TBADADDR */
4328348SEric.Yu@Sun.COM 		error = EADDRNOTAVAIL;
4338348SEric.Yu@Sun.COM 		goto done;
4348348SEric.Yu@Sun.COM 	}
4358348SEric.Yu@Sun.COM 
4368348SEric.Yu@Sun.COM 	icmp->icmp_pending_op = T_BIND_REQ;
4378348SEric.Yu@Sun.COM 	icmp->icmp_state = TS_IDLE;
4385240Snordmark 
4390Sstevel@tonic-gate 	/*
4400Sstevel@tonic-gate 	 * Copy the source address into our icmp structure.  This address
4410Sstevel@tonic-gate 	 * may still be zero; if so, ip will fill in the correct address
4420Sstevel@tonic-gate 	 * each time an outbound packet is passed to it.
4435240Snordmark 	 * If we are binding to a broadcast or multicast address then
4448348SEric.Yu@Sun.COM 	 * rawip_post_ip_bind_connect will clear the source address.
4450Sstevel@tonic-gate 	 */
4460Sstevel@tonic-gate 
4470Sstevel@tonic-gate 	if (icmp->icmp_family == AF_INET) {
4480Sstevel@tonic-gate 		ASSERT(sin != NULL);
4490Sstevel@tonic-gate 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
4500Sstevel@tonic-gate 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
4510Sstevel@tonic-gate 		    &icmp->icmp_v6src);
4520Sstevel@tonic-gate 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
4530Sstevel@tonic-gate 		    icmp->icmp_ip_snd_options_len;
4540Sstevel@tonic-gate 		icmp->icmp_bound_v6src = icmp->icmp_v6src;
4550Sstevel@tonic-gate 	} else {
4560Sstevel@tonic-gate 		int error;
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate 		ASSERT(sin6 != NULL);
4590Sstevel@tonic-gate 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
4600Sstevel@tonic-gate 		icmp->icmp_v6src = sin6->sin6_addr;
4610Sstevel@tonic-gate 		icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
4620Sstevel@tonic-gate 		icmp->icmp_bound_v6src = icmp->icmp_v6src;
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate 		/* Rebuild the header template */
4655240Snordmark 		error = icmp_build_hdrs(icmp);
4660Sstevel@tonic-gate 		if (error != 0) {
4675240Snordmark 			icmp->icmp_pending_op = -1;
4688348SEric.Yu@Sun.COM 			/*
4698348SEric.Yu@Sun.COM 			 * TSYSERR
4708348SEric.Yu@Sun.COM 			 */
4718348SEric.Yu@Sun.COM 			goto done;
4720Sstevel@tonic-gate 		}
4730Sstevel@tonic-gate 	}
4748348SEric.Yu@Sun.COM 
4758348SEric.Yu@Sun.COM 	ire_mp = NULL;
4760Sstevel@tonic-gate 	if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) {
4770Sstevel@tonic-gate 		/*
4788348SEric.Yu@Sun.COM 		 * request an IRE if src not 0 (INADDR_ANY)
4790Sstevel@tonic-gate 		 */
4808348SEric.Yu@Sun.COM 		ire_mp = allocb(sizeof (ire_t), BPRI_HI);
4818348SEric.Yu@Sun.COM 		if (ire_mp == NULL) {
4825240Snordmark 			icmp->icmp_pending_op = -1;
4838348SEric.Yu@Sun.COM 			error = ENOMEM;
4848348SEric.Yu@Sun.COM 			goto done;
4850Sstevel@tonic-gate 		}
4868348SEric.Yu@Sun.COM 		DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE;
4878348SEric.Yu@Sun.COM 	}
4888348SEric.Yu@Sun.COM done:
4898348SEric.Yu@Sun.COM 	rw_exit(&icmp->icmp_rwlock);
4908348SEric.Yu@Sun.COM 	if (error != 0)
4918348SEric.Yu@Sun.COM 		return (error);
4928348SEric.Yu@Sun.COM 
4938348SEric.Yu@Sun.COM 	if (icmp->icmp_family == AF_INET6) {
4948348SEric.Yu@Sun.COM 		error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto,
4958348SEric.Yu@Sun.COM 		    &sin6->sin6_addr, sin6->sin6_port, B_TRUE);
4968348SEric.Yu@Sun.COM 	} else {
4978348SEric.Yu@Sun.COM 		error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto,
4988348SEric.Yu@Sun.COM 		    sin->sin_addr.s_addr, sin->sin_port, B_TRUE);
4998348SEric.Yu@Sun.COM 	}
5008348SEric.Yu@Sun.COM 	rawip_post_ip_bind_connect(icmp, ire_mp, error);
5018348SEric.Yu@Sun.COM 	return (error);
5028348SEric.Yu@Sun.COM }
5038348SEric.Yu@Sun.COM 
5048348SEric.Yu@Sun.COM static void
5058348SEric.Yu@Sun.COM rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error)
5068348SEric.Yu@Sun.COM {
5078348SEric.Yu@Sun.COM 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
5088348SEric.Yu@Sun.COM 	if (icmp->icmp_state == TS_UNBND) {
5098348SEric.Yu@Sun.COM 		/*
5108348SEric.Yu@Sun.COM 		 * not yet bound - bind sent by icmp_bind_proto.
5118348SEric.Yu@Sun.COM 		 */
5128348SEric.Yu@Sun.COM 		rw_exit(&icmp->icmp_rwlock);
5138348SEric.Yu@Sun.COM 		return;
5148348SEric.Yu@Sun.COM 	}
5158348SEric.Yu@Sun.COM 	ASSERT(icmp->icmp_pending_op != -1);
5168348SEric.Yu@Sun.COM 	icmp->icmp_pending_op = -1;
5178348SEric.Yu@Sun.COM 
5188348SEric.Yu@Sun.COM 	if (error != 0) {
5198348SEric.Yu@Sun.COM 		if (icmp->icmp_state == TS_DATA_XFER) {
5208348SEric.Yu@Sun.COM 			/* Connect failed */
5218348SEric.Yu@Sun.COM 			/* Revert back to the bound source */
5228348SEric.Yu@Sun.COM 			icmp->icmp_v6src = icmp->icmp_bound_v6src;
5238348SEric.Yu@Sun.COM 			icmp->icmp_state = TS_IDLE;
5248348SEric.Yu@Sun.COM 			if (icmp->icmp_family == AF_INET6)
5258348SEric.Yu@Sun.COM 				(void) icmp_build_hdrs(icmp);
5268348SEric.Yu@Sun.COM 		} else {
5278348SEric.Yu@Sun.COM 			V6_SET_ZERO(icmp->icmp_v6src);
5288348SEric.Yu@Sun.COM 			V6_SET_ZERO(icmp->icmp_bound_v6src);
5298348SEric.Yu@Sun.COM 			icmp->icmp_state = TS_UNBND;
5308348SEric.Yu@Sun.COM 			if (icmp->icmp_family == AF_INET6)
5318348SEric.Yu@Sun.COM 				(void) icmp_build_hdrs(icmp);
5328348SEric.Yu@Sun.COM 		}
5338348SEric.Yu@Sun.COM 	} else {
5348348SEric.Yu@Sun.COM 		if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) {
5358348SEric.Yu@Sun.COM 			ire_t *ire;
5368348SEric.Yu@Sun.COM 
5378348SEric.Yu@Sun.COM 			ire = (ire_t *)ire_mp->b_rptr;
5388348SEric.Yu@Sun.COM 			/*
5398348SEric.Yu@Sun.COM 			 * If a broadcast/multicast address was bound set
5408348SEric.Yu@Sun.COM 			 * the source address to 0.
5418348SEric.Yu@Sun.COM 			 * This ensures no datagrams with broadcast address
5428348SEric.Yu@Sun.COM 			 * as source address are emitted (which would violate
5438348SEric.Yu@Sun.COM 			 * RFC1122 - Hosts requirements)
5448348SEric.Yu@Sun.COM 			 * Note: we get IRE_BROADCAST for IPv6
5458348SEric.Yu@Sun.COM 			 * to "mark" a multicast local address.
5468348SEric.Yu@Sun.COM 			 */
5478348SEric.Yu@Sun.COM 
5488348SEric.Yu@Sun.COM 
5498348SEric.Yu@Sun.COM 			if (ire->ire_type == IRE_BROADCAST &&
5508348SEric.Yu@Sun.COM 			    icmp->icmp_state != TS_DATA_XFER) {
5518348SEric.Yu@Sun.COM 				/*
5528348SEric.Yu@Sun.COM 				 * This was just a local bind to a
5538348SEric.Yu@Sun.COM 				 * MC/broadcast addr
5548348SEric.Yu@Sun.COM 				 */
5558348SEric.Yu@Sun.COM 				V6_SET_ZERO(icmp->icmp_v6src);
5568348SEric.Yu@Sun.COM 				if (icmp->icmp_family == AF_INET6)
5578348SEric.Yu@Sun.COM 					(void) icmp_build_hdrs(icmp);
5588348SEric.Yu@Sun.COM 			}
5598348SEric.Yu@Sun.COM 		}
5608348SEric.Yu@Sun.COM 
5610Sstevel@tonic-gate 	}
5625240Snordmark 	rw_exit(&icmp->icmp_rwlock);
5638348SEric.Yu@Sun.COM 	if (ire_mp != NULL)
5648348SEric.Yu@Sun.COM 		freeb(ire_mp);
5650Sstevel@tonic-gate }
5660Sstevel@tonic-gate 
5670Sstevel@tonic-gate /*
5680Sstevel@tonic-gate  * Send message to IP to just bind to the protocol.
5690Sstevel@tonic-gate  */
5708348SEric.Yu@Sun.COM static int
5718348SEric.Yu@Sun.COM icmp_bind_proto(conn_t *connp)
5720Sstevel@tonic-gate {
5730Sstevel@tonic-gate 	icmp_t	*icmp;
5748348SEric.Yu@Sun.COM 	int	error;
5755240Snordmark 
5765240Snordmark 	icmp = connp->conn_icmp;
5775240Snordmark 
5785240Snordmark 	if (icmp->icmp_family == AF_INET6)
5798348SEric.Yu@Sun.COM 		error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto,
5808348SEric.Yu@Sun.COM 		    &sin6_null.sin6_addr, 0, B_TRUE);
5815240Snordmark 	else
5828348SEric.Yu@Sun.COM 		error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto,
5838348SEric.Yu@Sun.COM 		    sin_null.sin_addr.s_addr, 0, B_TRUE);
5848348SEric.Yu@Sun.COM 
5858348SEric.Yu@Sun.COM 	rawip_post_ip_bind_connect(icmp, NULL, error);
5868348SEric.Yu@Sun.COM 	return (error);
5870Sstevel@tonic-gate }
5880Sstevel@tonic-gate 
5890Sstevel@tonic-gate static void
5908348SEric.Yu@Sun.COM icmp_tpi_connect(queue_t *q, mblk_t *mp)
5910Sstevel@tonic-gate {
5928348SEric.Yu@Sun.COM 	conn_t	*connp = Q_TO_CONN(q);
5930Sstevel@tonic-gate 	struct T_conn_req	*tcr;
5940Sstevel@tonic-gate 	icmp_t	*icmp;
5958348SEric.Yu@Sun.COM 	struct sockaddr *sa;
5968348SEric.Yu@Sun.COM 	socklen_t len;
5978348SEric.Yu@Sun.COM 	int error;
598*8778SErik.Nordmark@Sun.COM 	cred_t *cr;
599*8778SErik.Nordmark@Sun.COM 
600*8778SErik.Nordmark@Sun.COM 	/*
601*8778SErik.Nordmark@Sun.COM 	 * All Solaris components should pass a db_credp
602*8778SErik.Nordmark@Sun.COM 	 * for this TPI message, hence we ASSERT.
603*8778SErik.Nordmark@Sun.COM 	 * But in case there is some other M_PROTO that looks
604*8778SErik.Nordmark@Sun.COM 	 * like a TPI message sent by some other kernel
605*8778SErik.Nordmark@Sun.COM 	 * component, we check and return an error.
606*8778SErik.Nordmark@Sun.COM 	 */
607*8778SErik.Nordmark@Sun.COM 	cr = msg_getcred(mp, NULL);
608*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
609*8778SErik.Nordmark@Sun.COM 	if (cr == NULL) {
610*8778SErik.Nordmark@Sun.COM 		icmp_err_ack(q, mp, TSYSERR, EINVAL);
611*8778SErik.Nordmark@Sun.COM 		return;
612*8778SErik.Nordmark@Sun.COM 	}
6135240Snordmark 
6145240Snordmark 	icmp = connp->conn_icmp;
6150Sstevel@tonic-gate 	tcr = (struct T_conn_req *)mp->b_rptr;
6160Sstevel@tonic-gate 	/* Sanity checks */
6175240Snordmark 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
6180Sstevel@tonic-gate 		icmp_err_ack(q, mp, TPROTO, 0);
6190Sstevel@tonic-gate 		return;
6200Sstevel@tonic-gate 	}
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 	if (tcr->OPT_length != 0) {
6230Sstevel@tonic-gate 		icmp_err_ack(q, mp, TBADOPT, 0);
6240Sstevel@tonic-gate 		return;
6250Sstevel@tonic-gate 	}
6265240Snordmark 
6278348SEric.Yu@Sun.COM 	len = tcr->DEST_length;
6288348SEric.Yu@Sun.COM 
6298348SEric.Yu@Sun.COM 	switch (len) {
6300Sstevel@tonic-gate 	default:
6310Sstevel@tonic-gate 		icmp_err_ack(q, mp, TBADADDR, 0);
6320Sstevel@tonic-gate 		return;
6330Sstevel@tonic-gate 	case sizeof (sin_t):
6348348SEric.Yu@Sun.COM 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
6350Sstevel@tonic-gate 		    sizeof (sin_t));
6368348SEric.Yu@Sun.COM 		break;
6378348SEric.Yu@Sun.COM 	case sizeof (sin6_t):
6388348SEric.Yu@Sun.COM 		sa = (struct sockaddr *)mi_offset_param(mp,
6398348SEric.Yu@Sun.COM 		    tcr->DEST_offset, sizeof (sin6_t));
6408348SEric.Yu@Sun.COM 		break;
6418348SEric.Yu@Sun.COM 	}
6428348SEric.Yu@Sun.COM 
6438348SEric.Yu@Sun.COM 	error = proto_verify_ip_addr(icmp->icmp_family, sa, len);
6448348SEric.Yu@Sun.COM 	if (error != 0) {
6458348SEric.Yu@Sun.COM 		icmp_err_ack(q, mp, TSYSERR, error);
6468348SEric.Yu@Sun.COM 		return;
6478348SEric.Yu@Sun.COM 	}
6488348SEric.Yu@Sun.COM 
649*8778SErik.Nordmark@Sun.COM 	error = rawip_do_connect(connp, sa, len, cr);
6508348SEric.Yu@Sun.COM 	if (error != 0) {
6518348SEric.Yu@Sun.COM 		if (error < 0) {
6528348SEric.Yu@Sun.COM 			icmp_err_ack(q, mp, -error, 0);
6538348SEric.Yu@Sun.COM 		} else {
6548348SEric.Yu@Sun.COM 			icmp_err_ack(q, mp, 0, error);
6550Sstevel@tonic-gate 		}
6568348SEric.Yu@Sun.COM 	} else {
6578348SEric.Yu@Sun.COM 		mblk_t *mp1;
6588348SEric.Yu@Sun.COM 
6598348SEric.Yu@Sun.COM 		/*
6608348SEric.Yu@Sun.COM 		 * We have to send a connection confirmation to
6618348SEric.Yu@Sun.COM 		 * keep TLI happy.
6628348SEric.Yu@Sun.COM 		 */
6638348SEric.Yu@Sun.COM 		if (icmp->icmp_family == AF_INET) {
6648348SEric.Yu@Sun.COM 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
6658348SEric.Yu@Sun.COM 			    sizeof (sin_t), NULL, 0);
6668348SEric.Yu@Sun.COM 		} else {
6678348SEric.Yu@Sun.COM 			ASSERT(icmp->icmp_family == AF_INET6);
6688348SEric.Yu@Sun.COM 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
6698348SEric.Yu@Sun.COM 			    sizeof (sin6_t), NULL, 0);
6708348SEric.Yu@Sun.COM 		}
6718348SEric.Yu@Sun.COM 		if (mp1 == NULL) {
6728348SEric.Yu@Sun.COM 			rw_exit(&icmp->icmp_rwlock);
6738348SEric.Yu@Sun.COM 			icmp_err_ack(q, mp, TSYSERR, ENOMEM);
6740Sstevel@tonic-gate 			return;
6750Sstevel@tonic-gate 		}
6768348SEric.Yu@Sun.COM 
6778348SEric.Yu@Sun.COM 		/*
6788348SEric.Yu@Sun.COM 		 * Send ok_ack for T_CONN_REQ
6798348SEric.Yu@Sun.COM 		 */
6808348SEric.Yu@Sun.COM 		mp = mi_tpi_ok_ack_alloc(mp);
6818348SEric.Yu@Sun.COM 		if (mp == NULL) {
6828348SEric.Yu@Sun.COM 			/* Unable to reuse the T_CONN_REQ for the ack. */
6838348SEric.Yu@Sun.COM 			freemsg(mp1);
6848348SEric.Yu@Sun.COM 			icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
6850Sstevel@tonic-gate 			return;
6860Sstevel@tonic-gate 		}
6878348SEric.Yu@Sun.COM 		putnext(connp->conn_rq, mp);
6888348SEric.Yu@Sun.COM 		putnext(connp->conn_rq, mp1);
6898348SEric.Yu@Sun.COM 	}
6908348SEric.Yu@Sun.COM }
6918348SEric.Yu@Sun.COM 
6928348SEric.Yu@Sun.COM static int
693*8778SErik.Nordmark@Sun.COM rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
694*8778SErik.Nordmark@Sun.COM     cred_t *cr)
6958348SEric.Yu@Sun.COM {
6968348SEric.Yu@Sun.COM 	icmp_t	*icmp;
6978348SEric.Yu@Sun.COM 	sin_t	*sin;
6988348SEric.Yu@Sun.COM 	sin6_t	*sin6;
6998348SEric.Yu@Sun.COM 	mblk_t  *ire_mp;
7008348SEric.Yu@Sun.COM 	int	error;
7018348SEric.Yu@Sun.COM 	ipaddr_t	v4dst;
7028348SEric.Yu@Sun.COM 	in6_addr_t	v6dst;
7038348SEric.Yu@Sun.COM 
7048348SEric.Yu@Sun.COM 	icmp = connp->conn_icmp;
7058348SEric.Yu@Sun.COM 
7068348SEric.Yu@Sun.COM 	if (sa == NULL || !OK_32PTR((char *)sa)) {
7078348SEric.Yu@Sun.COM 		return (EINVAL);
7088348SEric.Yu@Sun.COM 	}
7098348SEric.Yu@Sun.COM 
7108348SEric.Yu@Sun.COM 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
7118348SEric.Yu@Sun.COM 	if (ire_mp == NULL)
7128348SEric.Yu@Sun.COM 		return (ENOMEM);
7138348SEric.Yu@Sun.COM 	DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE;
7148348SEric.Yu@Sun.COM 
7158348SEric.Yu@Sun.COM 
7168348SEric.Yu@Sun.COM 	ASSERT(sa != NULL && len != 0);
7178348SEric.Yu@Sun.COM 
7188348SEric.Yu@Sun.COM 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
7198348SEric.Yu@Sun.COM 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
7208348SEric.Yu@Sun.COM 		rw_exit(&icmp->icmp_rwlock);
7218348SEric.Yu@Sun.COM 		freeb(ire_mp);
7228348SEric.Yu@Sun.COM 		return (-TOUTSTATE);
7238348SEric.Yu@Sun.COM 	}
7248348SEric.Yu@Sun.COM 
7258348SEric.Yu@Sun.COM 	switch (len) {
7268348SEric.Yu@Sun.COM 	case sizeof (sin_t):
7278348SEric.Yu@Sun.COM 		sin = (sin_t *)sa;
7288348SEric.Yu@Sun.COM 
7298348SEric.Yu@Sun.COM 		ASSERT(icmp->icmp_family == AF_INET);
7308348SEric.Yu@Sun.COM 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
7318348SEric.Yu@Sun.COM 
7328348SEric.Yu@Sun.COM 		v4dst = sin->sin_addr.s_addr;
7330Sstevel@tonic-gate 		/*
7340Sstevel@tonic-gate 		 * Interpret a zero destination to mean loopback.
7350Sstevel@tonic-gate 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
7360Sstevel@tonic-gate 		 * generate the T_CONN_CON.
7370Sstevel@tonic-gate 		 */
7380Sstevel@tonic-gate 		if (v4dst == INADDR_ANY) {
7390Sstevel@tonic-gate 			v4dst = htonl(INADDR_LOOPBACK);
7400Sstevel@tonic-gate 		}
7418348SEric.Yu@Sun.COM 
7428348SEric.Yu@Sun.COM 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
7438348SEric.Yu@Sun.COM 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
7448348SEric.Yu@Sun.COM 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
7458348SEric.Yu@Sun.COM 		    icmp->icmp_ip_snd_options_len;
7468348SEric.Yu@Sun.COM 		icmp->icmp_v6dst.sin6_addr = v6dst;
7478348SEric.Yu@Sun.COM 		icmp->icmp_v6dst.sin6_family = AF_INET6;
7488348SEric.Yu@Sun.COM 		icmp->icmp_v6dst.sin6_flowinfo = 0;
7498348SEric.Yu@Sun.COM 		icmp->icmp_v6dst.sin6_port = 0;
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate 		/*
7520Sstevel@tonic-gate 		 * If the destination address is multicast and
7530Sstevel@tonic-gate 		 * an outgoing multicast interface has been set,
7540Sstevel@tonic-gate 		 * use the address of that interface as our
7550Sstevel@tonic-gate 		 * source address if no source address has been set.
7560Sstevel@tonic-gate 		 */
7570Sstevel@tonic-gate 		if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY &&
7580Sstevel@tonic-gate 		    CLASSD(v4dst) &&
7590Sstevel@tonic-gate 		    icmp->icmp_multicast_if_addr != INADDR_ANY) {
7600Sstevel@tonic-gate 			IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr,
7610Sstevel@tonic-gate 			    &icmp->icmp_v6src);
7620Sstevel@tonic-gate 		}
7638348SEric.Yu@Sun.COM 		break;
7648348SEric.Yu@Sun.COM 	case sizeof (sin6_t):
7658348SEric.Yu@Sun.COM 		sin6 = (sin6_t *)sa;
7668348SEric.Yu@Sun.COM 
7678348SEric.Yu@Sun.COM 		/* No support for mapped addresses on raw sockets */
7688348SEric.Yu@Sun.COM 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
7698348SEric.Yu@Sun.COM 			rw_exit(&icmp->icmp_rwlock);
7708348SEric.Yu@Sun.COM 			freeb(ire_mp);
7718348SEric.Yu@Sun.COM 			return (EADDRNOTAVAIL);
7728348SEric.Yu@Sun.COM 		}
7738348SEric.Yu@Sun.COM 
7740Sstevel@tonic-gate 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
7758348SEric.Yu@Sun.COM 		ASSERT(icmp->icmp_family == AF_INET6);
7768348SEric.Yu@Sun.COM 
7778348SEric.Yu@Sun.COM 		icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
7788348SEric.Yu@Sun.COM 
7798348SEric.Yu@Sun.COM 		icmp->icmp_v6dst = *sin6;
7808348SEric.Yu@Sun.COM 		icmp->icmp_v6dst.sin6_port = 0;
7818348SEric.Yu@Sun.COM 
7820Sstevel@tonic-gate 		/*
7830Sstevel@tonic-gate 		 * Interpret a zero destination to mean loopback.
7840Sstevel@tonic-gate 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
7850Sstevel@tonic-gate 		 * generate the T_CONN_CON.
7860Sstevel@tonic-gate 		 */
7878348SEric.Yu@Sun.COM 		if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) {
7888348SEric.Yu@Sun.COM 			icmp->icmp_v6dst.sin6_addr = ipv6_loopback;
7890Sstevel@tonic-gate 		}
7900Sstevel@tonic-gate 		/*
7910Sstevel@tonic-gate 		 * If the destination address is multicast and
7920Sstevel@tonic-gate 		 * an outgoing multicast interface has been set,
7930Sstevel@tonic-gate 		 * then the ip bind logic will pick the correct source
7940Sstevel@tonic-gate 		 * address (i.e. matching the outgoing multicast interface).
7950Sstevel@tonic-gate 		 */
7968348SEric.Yu@Sun.COM 		break;
7978348SEric.Yu@Sun.COM 	}
7988348SEric.Yu@Sun.COM 
7995240Snordmark 	icmp->icmp_pending_op = T_CONN_REQ;
8005240Snordmark 
8015240Snordmark 	if (icmp->icmp_state == TS_DATA_XFER) {
8025240Snordmark 		/* Already connected - clear out state */
8035240Snordmark 		icmp->icmp_v6src = icmp->icmp_bound_v6src;
8045240Snordmark 		icmp->icmp_state = TS_IDLE;
8055240Snordmark 	}
8065240Snordmark 
8070Sstevel@tonic-gate 	icmp->icmp_state = TS_DATA_XFER;
8085240Snordmark 	rw_exit(&icmp->icmp_rwlock);
8090Sstevel@tonic-gate 
8108348SEric.Yu@Sun.COM 	if (icmp->icmp_family == AF_INET6) {
8118348SEric.Yu@Sun.COM 		error = ip_proto_bind_connected_v6(connp, &ire_mp,
8128348SEric.Yu@Sun.COM 		    icmp->icmp_proto, &icmp->icmp_v6src, 0,
8138348SEric.Yu@Sun.COM 		    &icmp->icmp_v6dst.sin6_addr,
814*8778SErik.Nordmark@Sun.COM 		    NULL, sin6->sin6_port, B_TRUE, B_TRUE, cr);
8158348SEric.Yu@Sun.COM 	} else {
8168348SEric.Yu@Sun.COM 		error = ip_proto_bind_connected_v4(connp, &ire_mp,
8178348SEric.Yu@Sun.COM 		    icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0,
8188348SEric.Yu@Sun.COM 		    V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port,
819*8778SErik.Nordmark@Sun.COM 		    B_TRUE, B_TRUE, cr);
8208348SEric.Yu@Sun.COM 	}
8218348SEric.Yu@Sun.COM 	rawip_post_ip_bind_connect(icmp, ire_mp, error);
8228348SEric.Yu@Sun.COM 	return (error);
8230Sstevel@tonic-gate }
8240Sstevel@tonic-gate 
8255240Snordmark static void
8265240Snordmark icmp_close_free(conn_t *connp)
8270Sstevel@tonic-gate {
8285240Snordmark 	icmp_t *icmp = connp->conn_icmp;
8290Sstevel@tonic-gate 
8300Sstevel@tonic-gate 	/* If there are any options associated with the stream, free them. */
8315315Snordmark 	if (icmp->icmp_ip_snd_options != NULL) {
8320Sstevel@tonic-gate 		mi_free((char *)icmp->icmp_ip_snd_options);
8335315Snordmark 		icmp->icmp_ip_snd_options = NULL;
8345330Snordmark 		icmp->icmp_ip_snd_options_len = 0;
8355315Snordmark 	}
8365315Snordmark 
8375315Snordmark 	if (icmp->icmp_filter != NULL) {
8380Sstevel@tonic-gate 		kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
8395315Snordmark 		icmp->icmp_filter = NULL;
8405315Snordmark 	}
8418348SEric.Yu@Sun.COM 
8420Sstevel@tonic-gate 	/* Free memory associated with sticky options */
8430Sstevel@tonic-gate 	if (icmp->icmp_sticky_hdrs_len != 0) {
8440Sstevel@tonic-gate 		kmem_free(icmp->icmp_sticky_hdrs,
8450Sstevel@tonic-gate 		    icmp->icmp_sticky_hdrs_len);
8460Sstevel@tonic-gate 		icmp->icmp_sticky_hdrs = NULL;
8470Sstevel@tonic-gate 		icmp->icmp_sticky_hdrs_len = 0;
8480Sstevel@tonic-gate 	}
8491676Sjpk 	ip6_pkt_free(&icmp->icmp_sticky_ipp);
8505330Snordmark 
8515330Snordmark 	/*
8525330Snordmark 	 * Clear any fields which the kmem_cache constructor clears.
8535330Snordmark 	 * Only icmp_connp needs to be preserved.
8545330Snordmark 	 * TBD: We should make this more efficient to avoid clearing
8555330Snordmark 	 * everything.
8565330Snordmark 	 */
8575330Snordmark 	ASSERT(icmp->icmp_connp == connp);
8585330Snordmark 	bzero(icmp, sizeof (icmp_t));
8595330Snordmark 	icmp->icmp_connp = connp;
8605240Snordmark }
8615240Snordmark 
8625240Snordmark static int
8638348SEric.Yu@Sun.COM rawip_do_close(conn_t *connp)
8645240Snordmark {
8655240Snordmark 	ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
8665240Snordmark 
8675240Snordmark 	ip_quiesce_conn(connp);
8685240Snordmark 
8698348SEric.Yu@Sun.COM 	if (!IPCL_IS_NONSTR(connp)) {
8708348SEric.Yu@Sun.COM 		qprocsoff(connp->conn_rq);
8718348SEric.Yu@Sun.COM 	}
8728348SEric.Yu@Sun.COM 
8738348SEric.Yu@Sun.COM 	ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL &&
8748348SEric.Yu@Sun.COM 	    connp->conn_icmp->icmp_fallback_queue_tail == NULL);
8755240Snordmark 	icmp_close_free(connp);
8765240Snordmark 
8775240Snordmark 	/*
8785240Snordmark 	 * Now we are truly single threaded on this stream, and can
8795240Snordmark 	 * delete the things hanging off the connp, and finally the connp.
8805240Snordmark 	 * We removed this connp from the fanout list, it cannot be
8815240Snordmark 	 * accessed thru the fanouts, and we already waited for the
8825240Snordmark 	 * conn_ref to drop to 0. We are already in close, so
8835240Snordmark 	 * there cannot be any other thread from the top. qprocsoff
8845240Snordmark 	 * has completed, and service has completed or won't run in
8855240Snordmark 	 * future.
8865240Snordmark 	 */
8875240Snordmark 	ASSERT(connp->conn_ref == 1);
8885240Snordmark 
8898348SEric.Yu@Sun.COM 	if (!IPCL_IS_NONSTR(connp)) {
8908348SEric.Yu@Sun.COM 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
8918348SEric.Yu@Sun.COM 	} else {
8928477SRao.Shoaib@Sun.COM 		ip_free_helper_stream(connp);
8938348SEric.Yu@Sun.COM 	}
8945240Snordmark 
8955240Snordmark 	connp->conn_ref--;
8965240Snordmark 	ipcl_conn_destroy(connp);
8975240Snordmark 
8988348SEric.Yu@Sun.COM 	return (0);
8998348SEric.Yu@Sun.COM }
9008348SEric.Yu@Sun.COM 
9018348SEric.Yu@Sun.COM static int
9028348SEric.Yu@Sun.COM icmp_close(queue_t *q, int flags)
9038348SEric.Yu@Sun.COM {
9048348SEric.Yu@Sun.COM 	conn_t  *connp;
9058348SEric.Yu@Sun.COM 
9068348SEric.Yu@Sun.COM 	if (flags & SO_FALLBACK) {
9078348SEric.Yu@Sun.COM 		/*
9088348SEric.Yu@Sun.COM 		 * stream is being closed while in fallback
9098348SEric.Yu@Sun.COM 		 * simply free the resources that were allocated
9108348SEric.Yu@Sun.COM 		 */
9118348SEric.Yu@Sun.COM 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
9128348SEric.Yu@Sun.COM 		qprocsoff(q);
9138348SEric.Yu@Sun.COM 		goto done;
9148348SEric.Yu@Sun.COM 	}
9158348SEric.Yu@Sun.COM 
9168348SEric.Yu@Sun.COM 	connp = Q_TO_CONN(q);
9178348SEric.Yu@Sun.COM 	(void) rawip_do_close(connp);
9188348SEric.Yu@Sun.COM done:
9195240Snordmark 	q->q_ptr = WR(q)->q_ptr = NULL;
9205240Snordmark 	return (0);
9210Sstevel@tonic-gate }
9220Sstevel@tonic-gate 
9230Sstevel@tonic-gate /*
9240Sstevel@tonic-gate  * This routine handles each T_DISCON_REQ message passed to icmp
9250Sstevel@tonic-gate  * as an indicating that ICMP is no longer connected. This results
9260Sstevel@tonic-gate  * in sending a T_BIND_REQ to IP to restore the binding to just
9270Sstevel@tonic-gate  * the local address.
9280Sstevel@tonic-gate  *
9298348SEric.Yu@Sun.COM  * The disconnect completes in rawip_post_ip_bind_connect.
9300Sstevel@tonic-gate  */
9318348SEric.Yu@Sun.COM static int
9328348SEric.Yu@Sun.COM icmp_do_disconnect(conn_t *connp)
9330Sstevel@tonic-gate {
9340Sstevel@tonic-gate 	icmp_t	*icmp;
9358348SEric.Yu@Sun.COM 	mblk_t	*ire_mp;
9368348SEric.Yu@Sun.COM 	int error;
9375240Snordmark 
9385240Snordmark 	icmp = connp->conn_icmp;
9395240Snordmark 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
9405240Snordmark 	if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) {
9415240Snordmark 		rw_exit(&icmp->icmp_rwlock);
9428348SEric.Yu@Sun.COM 		return (-TOUTSTATE);
9430Sstevel@tonic-gate 	}
9445240Snordmark 	icmp->icmp_pending_op = T_DISCON_REQ;
9450Sstevel@tonic-gate 	icmp->icmp_v6src = icmp->icmp_bound_v6src;
9460Sstevel@tonic-gate 	icmp->icmp_state = TS_IDLE;
9470Sstevel@tonic-gate 
9480Sstevel@tonic-gate 
9490Sstevel@tonic-gate 	if (icmp->icmp_family == AF_INET6) {
9500Sstevel@tonic-gate 		/* Rebuild the header template */
9515240Snordmark 		error = icmp_build_hdrs(icmp);
9520Sstevel@tonic-gate 		if (error != 0) {
9535240Snordmark 			icmp->icmp_pending_op = -1;
9545240Snordmark 			rw_exit(&icmp->icmp_rwlock);
9558348SEric.Yu@Sun.COM 			return (error);
9560Sstevel@tonic-gate 		}
9570Sstevel@tonic-gate 	}
9585240Snordmark 
9595240Snordmark 	rw_exit(&icmp->icmp_rwlock);
9608348SEric.Yu@Sun.COM 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
9618348SEric.Yu@Sun.COM 	if (ire_mp == NULL) {
9628348SEric.Yu@Sun.COM 		return (ENOMEM);
9638348SEric.Yu@Sun.COM 	}
9648348SEric.Yu@Sun.COM 
9658348SEric.Yu@Sun.COM 	if (icmp->icmp_family == AF_INET6) {
9668348SEric.Yu@Sun.COM 		error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto,
9678348SEric.Yu@Sun.COM 		    &icmp->icmp_bound_v6src, 0, B_TRUE);
9688348SEric.Yu@Sun.COM 	} else {
9698348SEric.Yu@Sun.COM 
9708348SEric.Yu@Sun.COM 		error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto,
9718348SEric.Yu@Sun.COM 		    V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE);
9728348SEric.Yu@Sun.COM 	}
9738348SEric.Yu@Sun.COM 
9748348SEric.Yu@Sun.COM 	rawip_post_ip_bind_connect(icmp, ire_mp, error);
9758348SEric.Yu@Sun.COM 
9768348SEric.Yu@Sun.COM 	return (error);
9778348SEric.Yu@Sun.COM }
9788348SEric.Yu@Sun.COM 
9798348SEric.Yu@Sun.COM static void
9808348SEric.Yu@Sun.COM icmp_tpi_disconnect(queue_t *q, mblk_t *mp)
9818348SEric.Yu@Sun.COM {
9828348SEric.Yu@Sun.COM 	conn_t	*connp = Q_TO_CONN(q);
9838348SEric.Yu@Sun.COM 	int	error;
9848348SEric.Yu@Sun.COM 
9858348SEric.Yu@Sun.COM 	/*
9868348SEric.Yu@Sun.COM 	 * Allocate the largest primitive we need to send back
9878348SEric.Yu@Sun.COM 	 * T_error_ack is > than T_ok_ack
9888348SEric.Yu@Sun.COM 	 */
9898348SEric.Yu@Sun.COM 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
9908348SEric.Yu@Sun.COM 	if (mp == NULL) {
9918348SEric.Yu@Sun.COM 		/* Unable to reuse the T_DISCON_REQ for the ack. */
9928348SEric.Yu@Sun.COM 		icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
9938348SEric.Yu@Sun.COM 		return;
9948348SEric.Yu@Sun.COM 	}
9958348SEric.Yu@Sun.COM 
9968348SEric.Yu@Sun.COM 	error = icmp_do_disconnect(connp);
9978348SEric.Yu@Sun.COM 
9988348SEric.Yu@Sun.COM 	if (error != 0) {
9998348SEric.Yu@Sun.COM 		if (error > 0) {
10008348SEric.Yu@Sun.COM 			icmp_err_ack(q, mp, 0, error);
10018348SEric.Yu@Sun.COM 		} else {
10028348SEric.Yu@Sun.COM 			icmp_err_ack(q, mp, -error, 0);
10038348SEric.Yu@Sun.COM 		}
10048348SEric.Yu@Sun.COM 	} else {
10058348SEric.Yu@Sun.COM 		mp = mi_tpi_ok_ack_alloc(mp);
10068348SEric.Yu@Sun.COM 		ASSERT(mp != NULL);
10078348SEric.Yu@Sun.COM 		qreply(q, mp);
10088348SEric.Yu@Sun.COM 	}
10098348SEric.Yu@Sun.COM 
10108348SEric.Yu@Sun.COM }
10118348SEric.Yu@Sun.COM 
10128348SEric.Yu@Sun.COM static int
10138348SEric.Yu@Sun.COM icmp_disconnect(conn_t *connp)
10148348SEric.Yu@Sun.COM {
10158348SEric.Yu@Sun.COM 	int	error;
10168348SEric.Yu@Sun.COM 	icmp_t	*icmp = connp->conn_icmp;
10178348SEric.Yu@Sun.COM 
10188348SEric.Yu@Sun.COM 	icmp->icmp_dgram_errind = B_FALSE;
10198348SEric.Yu@Sun.COM 
10208348SEric.Yu@Sun.COM 	error = icmp_do_disconnect(connp);
10218348SEric.Yu@Sun.COM 
10228348SEric.Yu@Sun.COM 	if (error < 0)
10238348SEric.Yu@Sun.COM 		error = proto_tlitosyserr(-error);
10248348SEric.Yu@Sun.COM 	return (error);
10250Sstevel@tonic-gate }
10260Sstevel@tonic-gate 
10270Sstevel@tonic-gate /* This routine creates a T_ERROR_ACK message and passes it upstream. */
10280Sstevel@tonic-gate static void
10290Sstevel@tonic-gate icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
10300Sstevel@tonic-gate {
10310Sstevel@tonic-gate 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
10320Sstevel@tonic-gate 		qreply(q, mp);
10330Sstevel@tonic-gate }
10340Sstevel@tonic-gate 
10350Sstevel@tonic-gate /* Shorthand to generate and send TPI error acks to our client */
10360Sstevel@tonic-gate static void
10370Sstevel@tonic-gate icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
10380Sstevel@tonic-gate     t_scalar_t t_error, int sys_error)
10390Sstevel@tonic-gate {
10400Sstevel@tonic-gate 	struct T_error_ack	*teackp;
10410Sstevel@tonic-gate 
10420Sstevel@tonic-gate 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
10430Sstevel@tonic-gate 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
10440Sstevel@tonic-gate 		teackp = (struct T_error_ack *)mp->b_rptr;
10450Sstevel@tonic-gate 		teackp->ERROR_prim = primitive;
10460Sstevel@tonic-gate 		teackp->TLI_error = t_error;
10470Sstevel@tonic-gate 		teackp->UNIX_error = sys_error;
10480Sstevel@tonic-gate 		qreply(q, mp);
10490Sstevel@tonic-gate 	}
10500Sstevel@tonic-gate }
10510Sstevel@tonic-gate 
10520Sstevel@tonic-gate /*
10535240Snordmark  * icmp_icmp_error is called by icmp_input to process ICMP
10540Sstevel@tonic-gate  * messages passed up by IP.
10558348SEric.Yu@Sun.COM  * Generates the appropriate permanent (non-transient) errors.
10560Sstevel@tonic-gate  * Assumes that IP has pulled up everything up to and including
10570Sstevel@tonic-gate  * the ICMP header.
10580Sstevel@tonic-gate  */
10590Sstevel@tonic-gate static void
10608348SEric.Yu@Sun.COM icmp_icmp_error(conn_t *connp, mblk_t *mp)
10610Sstevel@tonic-gate {
10620Sstevel@tonic-gate 	icmph_t *icmph;
10630Sstevel@tonic-gate 	ipha_t	*ipha;
10640Sstevel@tonic-gate 	int	iph_hdr_length;
10650Sstevel@tonic-gate 	sin_t	sin;
10660Sstevel@tonic-gate 	mblk_t	*mp1;
10670Sstevel@tonic-gate 	int	error = 0;
10688348SEric.Yu@Sun.COM 	icmp_t	*icmp = connp->conn_icmp;
10690Sstevel@tonic-gate 
10700Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
10710Sstevel@tonic-gate 
10725240Snordmark 	ASSERT(OK_32PTR(mp->b_rptr));
10735240Snordmark 
10740Sstevel@tonic-gate 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
10750Sstevel@tonic-gate 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
10768348SEric.Yu@Sun.COM 		icmp_icmp_error_ipv6(connp, mp);
10770Sstevel@tonic-gate 		return;
10780Sstevel@tonic-gate 	}
10798348SEric.Yu@Sun.COM 
10808348SEric.Yu@Sun.COM 	/*
10818348SEric.Yu@Sun.COM 	 * icmp does not support v4 mapped addresses
10828348SEric.Yu@Sun.COM 	 * so we can never be here for a V6 socket
10838348SEric.Yu@Sun.COM 	 * i.e. icmp_family == AF_INET6
10848348SEric.Yu@Sun.COM 	 */
10858348SEric.Yu@Sun.COM 	ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) &&
10868348SEric.Yu@Sun.COM 	    (icmp->icmp_family == AF_INET));
10878348SEric.Yu@Sun.COM 
10888348SEric.Yu@Sun.COM 	ASSERT(icmp->icmp_family == AF_INET);
10890Sstevel@tonic-gate 
10905240Snordmark 	/* Skip past the outer IP and ICMP headers */
10910Sstevel@tonic-gate 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
10920Sstevel@tonic-gate 	icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]);
10930Sstevel@tonic-gate 	ipha = (ipha_t *)&icmph[1];
10940Sstevel@tonic-gate 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
10950Sstevel@tonic-gate 
10960Sstevel@tonic-gate 	switch (icmph->icmph_type) {
10970Sstevel@tonic-gate 	case ICMP_DEST_UNREACHABLE:
10980Sstevel@tonic-gate 		switch (icmph->icmph_code) {
10990Sstevel@tonic-gate 		case ICMP_FRAGMENTATION_NEEDED:
11000Sstevel@tonic-gate 			/*
11010Sstevel@tonic-gate 			 * IP has already adjusted the path MTU.
11020Sstevel@tonic-gate 			 */
11030Sstevel@tonic-gate 			break;
11040Sstevel@tonic-gate 		case ICMP_PORT_UNREACHABLE:
11050Sstevel@tonic-gate 		case ICMP_PROTOCOL_UNREACHABLE:
11060Sstevel@tonic-gate 			error = ECONNREFUSED;
11070Sstevel@tonic-gate 			break;
11080Sstevel@tonic-gate 		default:
11090Sstevel@tonic-gate 			/* Transient errors */
11100Sstevel@tonic-gate 			break;
11110Sstevel@tonic-gate 		}
11120Sstevel@tonic-gate 		break;
11130Sstevel@tonic-gate 	default:
11140Sstevel@tonic-gate 		/* Transient errors */
11150Sstevel@tonic-gate 		break;
11160Sstevel@tonic-gate 	}
11170Sstevel@tonic-gate 	if (error == 0) {
11180Sstevel@tonic-gate 		freemsg(mp);
11190Sstevel@tonic-gate 		return;
11200Sstevel@tonic-gate 	}
11210Sstevel@tonic-gate 
11225240Snordmark 	/*
11235240Snordmark 	 * Deliver T_UDERROR_IND when the application has asked for it.
11245240Snordmark 	 * The socket layer enables this automatically when connected.
11255240Snordmark 	 */
11265240Snordmark 	if (!icmp->icmp_dgram_errind) {
11275240Snordmark 		freemsg(mp);
11285240Snordmark 		return;
11295240Snordmark 	}
11305240Snordmark 
11318348SEric.Yu@Sun.COM 	sin = sin_null;
11328348SEric.Yu@Sun.COM 	sin.sin_family = AF_INET;
11338348SEric.Yu@Sun.COM 	sin.sin_addr.s_addr = ipha->ipha_dst;
11348348SEric.Yu@Sun.COM 	if (IPCL_IS_NONSTR(connp)) {
11358348SEric.Yu@Sun.COM 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
11368348SEric.Yu@Sun.COM 		if (icmp->icmp_state == TS_DATA_XFER) {
11378348SEric.Yu@Sun.COM 			if (sin.sin_addr.s_addr ==
11388348SEric.Yu@Sun.COM 			    V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) {
11398348SEric.Yu@Sun.COM 				rw_exit(&icmp->icmp_rwlock);
11408348SEric.Yu@Sun.COM 				(*connp->conn_upcalls->su_set_error)
11418348SEric.Yu@Sun.COM 				    (connp->conn_upper_handle, error);
11428348SEric.Yu@Sun.COM 				goto done;
11438348SEric.Yu@Sun.COM 			}
11448348SEric.Yu@Sun.COM 		} else {
11458348SEric.Yu@Sun.COM 			icmp->icmp_delayed_error = error;
11468348SEric.Yu@Sun.COM 			*((sin_t *)&icmp->icmp_delayed_addr) = sin;
11478348SEric.Yu@Sun.COM 		}
11488348SEric.Yu@Sun.COM 		rw_exit(&icmp->icmp_rwlock);
11498348SEric.Yu@Sun.COM 	} else {
11508348SEric.Yu@Sun.COM 
11518348SEric.Yu@Sun.COM 		mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL,
11528348SEric.Yu@Sun.COM 		    0, error);
11538348SEric.Yu@Sun.COM 		if (mp1 != NULL)
11548348SEric.Yu@Sun.COM 			putnext(connp->conn_rq, mp1);
11558348SEric.Yu@Sun.COM 	}
11568348SEric.Yu@Sun.COM done:
11570Sstevel@tonic-gate 	freemsg(mp);
11580Sstevel@tonic-gate }
11590Sstevel@tonic-gate 
11600Sstevel@tonic-gate /*
11610Sstevel@tonic-gate  * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6
11620Sstevel@tonic-gate  * for IPv6 packets.
11630Sstevel@tonic-gate  * Send permanent (non-transient) errors upstream.
11640Sstevel@tonic-gate  * Assumes that IP has pulled up all the extension headers as well
11650Sstevel@tonic-gate  * as the ICMPv6 header.
11660Sstevel@tonic-gate  */
11670Sstevel@tonic-gate static void
11688348SEric.Yu@Sun.COM icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
11690Sstevel@tonic-gate {
11700Sstevel@tonic-gate 	icmp6_t		*icmp6;
11710Sstevel@tonic-gate 	ip6_t		*ip6h, *outer_ip6h;
11720Sstevel@tonic-gate 	uint16_t	iph_hdr_length;
11730Sstevel@tonic-gate 	uint8_t		*nexthdrp;
11740Sstevel@tonic-gate 	sin6_t		sin6;
11750Sstevel@tonic-gate 	mblk_t		*mp1;
11760Sstevel@tonic-gate 	int		error = 0;
11778348SEric.Yu@Sun.COM 	icmp_t		*icmp = connp->conn_icmp;
11780Sstevel@tonic-gate 
11790Sstevel@tonic-gate 	outer_ip6h = (ip6_t *)mp->b_rptr;
11800Sstevel@tonic-gate 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
11810Sstevel@tonic-gate 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
11820Sstevel@tonic-gate 	else
11830Sstevel@tonic-gate 		iph_hdr_length = IPV6_HDR_LEN;
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
11860Sstevel@tonic-gate 	ip6h = (ip6_t *)&icmp6[1];
11870Sstevel@tonic-gate 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
11880Sstevel@tonic-gate 		freemsg(mp);
11890Sstevel@tonic-gate 		return;
11900Sstevel@tonic-gate 	}
11915240Snordmark 
11920Sstevel@tonic-gate 	switch (icmp6->icmp6_type) {
11930Sstevel@tonic-gate 	case ICMP6_DST_UNREACH:
11940Sstevel@tonic-gate 		switch (icmp6->icmp6_code) {
11950Sstevel@tonic-gate 		case ICMP6_DST_UNREACH_NOPORT:
11960Sstevel@tonic-gate 			error = ECONNREFUSED;
11970Sstevel@tonic-gate 			break;
11980Sstevel@tonic-gate 		case ICMP6_DST_UNREACH_ADMIN:
11990Sstevel@tonic-gate 		case ICMP6_DST_UNREACH_NOROUTE:
12000Sstevel@tonic-gate 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
12010Sstevel@tonic-gate 		case ICMP6_DST_UNREACH_ADDR:
12020Sstevel@tonic-gate 			/* Transient errors */
12030Sstevel@tonic-gate 			break;
12040Sstevel@tonic-gate 		default:
12050Sstevel@tonic-gate 			break;
12060Sstevel@tonic-gate 		}
12070Sstevel@tonic-gate 		break;
12080Sstevel@tonic-gate 	case ICMP6_PACKET_TOO_BIG: {
12090Sstevel@tonic-gate 		struct T_unitdata_ind	*tudi;
12100Sstevel@tonic-gate 		struct T_opthdr		*toh;
12110Sstevel@tonic-gate 		size_t			udi_size;
12120Sstevel@tonic-gate 		mblk_t			*newmp;
12130Sstevel@tonic-gate 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
12140Sstevel@tonic-gate 		    sizeof (struct ip6_mtuinfo);
12150Sstevel@tonic-gate 		sin6_t			*sin6;
12160Sstevel@tonic-gate 		struct ip6_mtuinfo	*mtuinfo;
12170Sstevel@tonic-gate 
12180Sstevel@tonic-gate 		/*
12190Sstevel@tonic-gate 		 * If the application has requested to receive path mtu
12200Sstevel@tonic-gate 		 * information, send up an empty message containing an
12210Sstevel@tonic-gate 		 * IPV6_PATHMTU ancillary data item.
12220Sstevel@tonic-gate 		 */
12230Sstevel@tonic-gate 		if (!icmp->icmp_ipv6_recvpathmtu)
12240Sstevel@tonic-gate 			break;
12250Sstevel@tonic-gate 
12260Sstevel@tonic-gate 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
12270Sstevel@tonic-gate 		    opt_length;
12280Sstevel@tonic-gate 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
12295240Snordmark 			BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors);
12300Sstevel@tonic-gate 			break;
12310Sstevel@tonic-gate 		}
12320Sstevel@tonic-gate 
12330Sstevel@tonic-gate 		/*
12340Sstevel@tonic-gate 		 * newmp->b_cont is left to NULL on purpose.  This is an
12350Sstevel@tonic-gate 		 * empty message containing only ancillary data.
12360Sstevel@tonic-gate 		 */
12370Sstevel@tonic-gate 		newmp->b_datap->db_type = M_PROTO;
12380Sstevel@tonic-gate 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
12390Sstevel@tonic-gate 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
12400Sstevel@tonic-gate 		tudi->PRIM_type = T_UNITDATA_IND;
12410Sstevel@tonic-gate 		tudi->SRC_length = sizeof (sin6_t);
12420Sstevel@tonic-gate 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
12430Sstevel@tonic-gate 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
12440Sstevel@tonic-gate 		tudi->OPT_length = opt_length;
12450Sstevel@tonic-gate 
12460Sstevel@tonic-gate 		sin6 = (sin6_t *)&tudi[1];
12470Sstevel@tonic-gate 		bzero(sin6, sizeof (sin6_t));
12480Sstevel@tonic-gate 		sin6->sin6_family = AF_INET6;
12498348SEric.Yu@Sun.COM 		sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr;
12500Sstevel@tonic-gate 
12510Sstevel@tonic-gate 		toh = (struct T_opthdr *)&sin6[1];
12520Sstevel@tonic-gate 		toh->level = IPPROTO_IPV6;
12530Sstevel@tonic-gate 		toh->name = IPV6_PATHMTU;
12540Sstevel@tonic-gate 		toh->len = opt_length;
12550Sstevel@tonic-gate 		toh->status = 0;
12560Sstevel@tonic-gate 
12570Sstevel@tonic-gate 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
12580Sstevel@tonic-gate 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
12590Sstevel@tonic-gate 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
12600Sstevel@tonic-gate 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
12610Sstevel@tonic-gate 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
12620Sstevel@tonic-gate 		/*
12630Sstevel@tonic-gate 		 * We've consumed everything we need from the original
12640Sstevel@tonic-gate 		 * message.  Free it, then send our empty message.
12650Sstevel@tonic-gate 		 */
12660Sstevel@tonic-gate 		freemsg(mp);
12678348SEric.Yu@Sun.COM 		if (!IPCL_IS_NONSTR(connp)) {
12688348SEric.Yu@Sun.COM 			putnext(connp->conn_rq, newmp);
12698348SEric.Yu@Sun.COM 		} else {
12708348SEric.Yu@Sun.COM 			(*connp->conn_upcalls->su_recv)
12718348SEric.Yu@Sun.COM 			    (connp->conn_upper_handle, newmp, 0, 0, &error,
12728348SEric.Yu@Sun.COM 			    NULL);
12738348SEric.Yu@Sun.COM 			ASSERT(error == 0);
12748348SEric.Yu@Sun.COM 		}
12750Sstevel@tonic-gate 		return;
12760Sstevel@tonic-gate 	}
12770Sstevel@tonic-gate 	case ICMP6_TIME_EXCEEDED:
12780Sstevel@tonic-gate 		/* Transient errors */
12790Sstevel@tonic-gate 		break;
12800Sstevel@tonic-gate 	case ICMP6_PARAM_PROB:
12810Sstevel@tonic-gate 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
12820Sstevel@tonic-gate 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
12830Sstevel@tonic-gate 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
12840Sstevel@tonic-gate 		    (uchar_t *)nexthdrp) {
12850Sstevel@tonic-gate 			error = ECONNREFUSED;
12860Sstevel@tonic-gate 			break;
12870Sstevel@tonic-gate 		}
12880Sstevel@tonic-gate 		break;
12890Sstevel@tonic-gate 	}
12900Sstevel@tonic-gate 	if (error == 0) {
12910Sstevel@tonic-gate 		freemsg(mp);
12920Sstevel@tonic-gate 		return;
12930Sstevel@tonic-gate 	}
12940Sstevel@tonic-gate 
12955240Snordmark 	/*
12965240Snordmark 	 * Deliver T_UDERROR_IND when the application has asked for it.
12975240Snordmark 	 * The socket layer enables this automatically when connected.
12985240Snordmark 	 */
12995240Snordmark 	if (!icmp->icmp_dgram_errind) {
13005240Snordmark 		freemsg(mp);
13015240Snordmark 		return;
13025240Snordmark 	}
13035240Snordmark 
13040Sstevel@tonic-gate 	sin6 = sin6_null;
13050Sstevel@tonic-gate 	sin6.sin6_family = AF_INET6;
13060Sstevel@tonic-gate 	sin6.sin6_addr = ip6h->ip6_dst;
13070Sstevel@tonic-gate 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
13080Sstevel@tonic-gate 
13098348SEric.Yu@Sun.COM 	if (IPCL_IS_NONSTR(connp)) {
13108348SEric.Yu@Sun.COM 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
13118348SEric.Yu@Sun.COM 		if (icmp->icmp_state == TS_DATA_XFER) {
13128348SEric.Yu@Sun.COM 			if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
13138348SEric.Yu@Sun.COM 			    &icmp->icmp_v6dst.sin6_addr)) {
13148348SEric.Yu@Sun.COM 				rw_exit(&icmp->icmp_rwlock);
13158348SEric.Yu@Sun.COM 				(*connp->conn_upcalls->su_set_error)
13168348SEric.Yu@Sun.COM 				    (connp->conn_upper_handle, error);
13178348SEric.Yu@Sun.COM 				goto done;
13188348SEric.Yu@Sun.COM 			}
13198348SEric.Yu@Sun.COM 		} else {
13208348SEric.Yu@Sun.COM 			icmp->icmp_delayed_error = error;
13218348SEric.Yu@Sun.COM 			*((sin6_t *)&icmp->icmp_delayed_addr) = sin6;
13228348SEric.Yu@Sun.COM 		}
13238348SEric.Yu@Sun.COM 		rw_exit(&icmp->icmp_rwlock);
13248348SEric.Yu@Sun.COM 	} else {
13258348SEric.Yu@Sun.COM 
13268348SEric.Yu@Sun.COM 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
13278348SEric.Yu@Sun.COM 		    NULL, 0, error);
13288348SEric.Yu@Sun.COM 		if (mp1 != NULL)
13298348SEric.Yu@Sun.COM 			putnext(connp->conn_rq, mp1);
13308348SEric.Yu@Sun.COM 	}
13318348SEric.Yu@Sun.COM done:
13320Sstevel@tonic-gate 	freemsg(mp);
13330Sstevel@tonic-gate }
13340Sstevel@tonic-gate 
13350Sstevel@tonic-gate /*
13360Sstevel@tonic-gate  * This routine responds to T_ADDR_REQ messages.  It is called by icmp_wput.
13370Sstevel@tonic-gate  * The local address is filled in if endpoint is bound. The remote address
13380Sstevel@tonic-gate  * is filled in if remote address has been precified ("connected endpoint")
13390Sstevel@tonic-gate  * (The concept of connected CLTS sockets is alien to published TPI
13400Sstevel@tonic-gate  *  but we support it anyway).
13410Sstevel@tonic-gate  */
13420Sstevel@tonic-gate static void
13430Sstevel@tonic-gate icmp_addr_req(queue_t *q, mblk_t *mp)
13440Sstevel@tonic-gate {
13455240Snordmark 	icmp_t	*icmp = Q_TO_ICMP(q);
13460Sstevel@tonic-gate 	mblk_t	*ackmp;
13470Sstevel@tonic-gate 	struct T_addr_ack *taa;
13480Sstevel@tonic-gate 
13490Sstevel@tonic-gate 	/* Make it large enough for worst case */
13500Sstevel@tonic-gate 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
13510Sstevel@tonic-gate 	    2 * sizeof (sin6_t), 1);
13520Sstevel@tonic-gate 	if (ackmp == NULL) {
13530Sstevel@tonic-gate 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
13540Sstevel@tonic-gate 		return;
13550Sstevel@tonic-gate 	}
13560Sstevel@tonic-gate 	taa = (struct T_addr_ack *)ackmp->b_rptr;
13570Sstevel@tonic-gate 
13580Sstevel@tonic-gate 	bzero(taa, sizeof (struct T_addr_ack));
13590Sstevel@tonic-gate 	ackmp->b_wptr = (uchar_t *)&taa[1];
13600Sstevel@tonic-gate 
13610Sstevel@tonic-gate 	taa->PRIM_type = T_ADDR_ACK;
13620Sstevel@tonic-gate 	ackmp->b_datap->db_type = M_PCPROTO;
13635240Snordmark 	rw_enter(&icmp->icmp_rwlock, RW_READER);
13640Sstevel@tonic-gate 	/*
13650Sstevel@tonic-gate 	 * Note: Following code assumes 32 bit alignment of basic
13660Sstevel@tonic-gate 	 * data structures like sin_t and struct T_addr_ack.
13670Sstevel@tonic-gate 	 */
13680Sstevel@tonic-gate 	if (icmp->icmp_state != TS_UNBND) {
13690Sstevel@tonic-gate 		/*
13700Sstevel@tonic-gate 		 * Fill in local address
13710Sstevel@tonic-gate 		 */
13720Sstevel@tonic-gate 		taa->LOCADDR_offset = sizeof (*taa);
13730Sstevel@tonic-gate 		if (icmp->icmp_family == AF_INET) {
13740Sstevel@tonic-gate 			sin_t	*sin;
13750Sstevel@tonic-gate 
13760Sstevel@tonic-gate 			taa->LOCADDR_length = sizeof (sin_t);
13770Sstevel@tonic-gate 			sin = (sin_t *)&taa[1];
13780Sstevel@tonic-gate 			/* Fill zeroes and then intialize non-zero fields */
13790Sstevel@tonic-gate 			*sin = sin_null;
13800Sstevel@tonic-gate 			sin->sin_family = AF_INET;
13810Sstevel@tonic-gate 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
13820Sstevel@tonic-gate 			    !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
13830Sstevel@tonic-gate 				IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src,
13840Sstevel@tonic-gate 				    sin->sin_addr.s_addr);
13850Sstevel@tonic-gate 			} else {
13860Sstevel@tonic-gate 				/*
13870Sstevel@tonic-gate 				 * INADDR_ANY
13880Sstevel@tonic-gate 				 * icmp_v6src is not set, we might be bound to
13890Sstevel@tonic-gate 				 * broadcast/multicast. Use icmp_bound_v6src as
13900Sstevel@tonic-gate 				 * local address instead (that could
13910Sstevel@tonic-gate 				 * also still be INADDR_ANY)
13920Sstevel@tonic-gate 				 */
13930Sstevel@tonic-gate 				IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src,
13940Sstevel@tonic-gate 				    sin->sin_addr.s_addr);
13950Sstevel@tonic-gate 			}
13960Sstevel@tonic-gate 			ackmp->b_wptr = (uchar_t *)&sin[1];
13970Sstevel@tonic-gate 		} else {
13980Sstevel@tonic-gate 			sin6_t	*sin6;
13990Sstevel@tonic-gate 
14000Sstevel@tonic-gate 			ASSERT(icmp->icmp_family == AF_INET6);
14010Sstevel@tonic-gate 			taa->LOCADDR_length = sizeof (sin6_t);
14020Sstevel@tonic-gate 			sin6 = (sin6_t *)&taa[1];
14030Sstevel@tonic-gate 			/* Fill zeroes and then intialize non-zero fields */
14040Sstevel@tonic-gate 			*sin6 = sin6_null;
14050Sstevel@tonic-gate 			sin6->sin6_family = AF_INET6;
14060Sstevel@tonic-gate 			if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
14070Sstevel@tonic-gate 				sin6->sin6_addr = icmp->icmp_v6src;
14080Sstevel@tonic-gate 			} else {
14090Sstevel@tonic-gate 				/*
14100Sstevel@tonic-gate 				 * UNSPECIFIED
14110Sstevel@tonic-gate 				 * icmp_v6src is not set, we might be bound to
14120Sstevel@tonic-gate 				 * broadcast/multicast. Use icmp_bound_v6src as
14130Sstevel@tonic-gate 				 * local address instead (that could
14140Sstevel@tonic-gate 				 * also still be UNSPECIFIED)
14150Sstevel@tonic-gate 				 */
14160Sstevel@tonic-gate 				sin6->sin6_addr = icmp->icmp_bound_v6src;
14170Sstevel@tonic-gate 			}
14180Sstevel@tonic-gate 			ackmp->b_wptr = (uchar_t *)&sin6[1];
14190Sstevel@tonic-gate 		}
14200Sstevel@tonic-gate 	}
14215240Snordmark 	rw_exit(&icmp->icmp_rwlock);
14220Sstevel@tonic-gate 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
14230Sstevel@tonic-gate 	qreply(q, ackmp);
14240Sstevel@tonic-gate }
14250Sstevel@tonic-gate 
14260Sstevel@tonic-gate static void
14270Sstevel@tonic-gate icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp)
14280Sstevel@tonic-gate {
14290Sstevel@tonic-gate 	*tap = icmp_g_t_info_ack;
14300Sstevel@tonic-gate 
14310Sstevel@tonic-gate 	if (icmp->icmp_family == AF_INET6)
14320Sstevel@tonic-gate 		tap->ADDR_size = sizeof (sin6_t);
14330Sstevel@tonic-gate 	else
14340Sstevel@tonic-gate 		tap->ADDR_size = sizeof (sin_t);
14350Sstevel@tonic-gate 	tap->CURRENT_state = icmp->icmp_state;
14360Sstevel@tonic-gate 	tap->OPT_size = icmp_max_optsize;
14370Sstevel@tonic-gate }
14380Sstevel@tonic-gate 
14398348SEric.Yu@Sun.COM static void
14408348SEric.Yu@Sun.COM icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap,
14418348SEric.Yu@Sun.COM     t_uscalar_t cap_bits1)
14428348SEric.Yu@Sun.COM {
14438348SEric.Yu@Sun.COM 	tcap->CAP_bits1 = 0;
14448348SEric.Yu@Sun.COM 
14458348SEric.Yu@Sun.COM 	if (cap_bits1 & TC1_INFO) {
14468348SEric.Yu@Sun.COM 		icmp_copy_info(&tcap->INFO_ack, icmp);
14478348SEric.Yu@Sun.COM 		tcap->CAP_bits1 |= TC1_INFO;
14488348SEric.Yu@Sun.COM 	}
14498348SEric.Yu@Sun.COM }
14508348SEric.Yu@Sun.COM 
14510Sstevel@tonic-gate /*
14520Sstevel@tonic-gate  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
14530Sstevel@tonic-gate  * icmp_wput.  Much of the T_CAPABILITY_ACK information is copied from
14540Sstevel@tonic-gate  * icmp_g_t_info_ack.  The current state of the stream is copied from
14550Sstevel@tonic-gate  * icmp_state.
14560Sstevel@tonic-gate  */
14570Sstevel@tonic-gate static void
14580Sstevel@tonic-gate icmp_capability_req(queue_t *q, mblk_t *mp)
14590Sstevel@tonic-gate {
14605240Snordmark 	icmp_t			*icmp = Q_TO_ICMP(q);
14610Sstevel@tonic-gate 	t_uscalar_t		cap_bits1;
14620Sstevel@tonic-gate 	struct T_capability_ack	*tcap;
14630Sstevel@tonic-gate 
14640Sstevel@tonic-gate 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
14650Sstevel@tonic-gate 
14660Sstevel@tonic-gate 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
14675240Snordmark 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
14680Sstevel@tonic-gate 	if (!mp)
14690Sstevel@tonic-gate 		return;
14700Sstevel@tonic-gate 
14710Sstevel@tonic-gate 	tcap = (struct T_capability_ack *)mp->b_rptr;
14728348SEric.Yu@Sun.COM 
14738348SEric.Yu@Sun.COM 	icmp_do_capability_ack(icmp, tcap, cap_bits1);
14740Sstevel@tonic-gate 
14750Sstevel@tonic-gate 	qreply(q, mp);
14760Sstevel@tonic-gate }
14770Sstevel@tonic-gate 
14780Sstevel@tonic-gate /*
14790Sstevel@tonic-gate  * This routine responds to T_INFO_REQ messages.  It is called by icmp_wput.
14800Sstevel@tonic-gate  * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
14810Sstevel@tonic-gate  * The current state of the stream is copied from icmp_state.
14820Sstevel@tonic-gate  */
14830Sstevel@tonic-gate static void
14840Sstevel@tonic-gate icmp_info_req(queue_t *q, mblk_t *mp)
14850Sstevel@tonic-gate {
14865240Snordmark 	icmp_t	*icmp = Q_TO_ICMP(q);
14870Sstevel@tonic-gate 
14880Sstevel@tonic-gate 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
14890Sstevel@tonic-gate 	    T_INFO_ACK);
14900Sstevel@tonic-gate 	if (!mp)
14910Sstevel@tonic-gate 		return;
14920Sstevel@tonic-gate 	icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp);
14930Sstevel@tonic-gate 	qreply(q, mp);
14940Sstevel@tonic-gate }
14950Sstevel@tonic-gate 
14965240Snordmark /* For /dev/icmp aka AF_INET open */
14975240Snordmark static int
14988348SEric.Yu@Sun.COM icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
14998348SEric.Yu@Sun.COM     int family)
15005240Snordmark {
15015240Snordmark 	conn_t *connp;
15025240Snordmark 	dev_t	conn_dev;
15033448Sdh155122 	icmp_stack_t *is;
15048348SEric.Yu@Sun.COM 	int	error;
15058348SEric.Yu@Sun.COM 
15068348SEric.Yu@Sun.COM 	conn_dev = NULL;
15070Sstevel@tonic-gate 
15080Sstevel@tonic-gate 	/* If the stream is already open, return immediately. */
15090Sstevel@tonic-gate 	if (q->q_ptr != NULL)
15100Sstevel@tonic-gate 		return (0);
15110Sstevel@tonic-gate 
15125240Snordmark 	if (sflag == MODOPEN)
15130Sstevel@tonic-gate 		return (EINVAL);
15140Sstevel@tonic-gate 
15158348SEric.Yu@Sun.COM 	/*
15168348SEric.Yu@Sun.COM 	 * Since ICMP is not used so heavily, allocating from the small
15178348SEric.Yu@Sun.COM 	 * arena should be sufficient.
15188348SEric.Yu@Sun.COM 	 */
15198348SEric.Yu@Sun.COM 	if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
15208348SEric.Yu@Sun.COM 		return (EBUSY);
15218348SEric.Yu@Sun.COM 	}
15228348SEric.Yu@Sun.COM 
15238348SEric.Yu@Sun.COM 	if (flag & SO_FALLBACK) {
15248348SEric.Yu@Sun.COM 		/*
15258348SEric.Yu@Sun.COM 		 * Non streams socket needs a stream to fallback to
15268348SEric.Yu@Sun.COM 		 */
15278348SEric.Yu@Sun.COM 		RD(q)->q_ptr = (void *)conn_dev;
15288348SEric.Yu@Sun.COM 		WR(q)->q_qinfo = &icmp_fallback_sock_winit;
15298348SEric.Yu@Sun.COM 		WR(q)->q_ptr = (void *)ip_minor_arena_sa;
15308348SEric.Yu@Sun.COM 		qprocson(q);
15318348SEric.Yu@Sun.COM 		return (0);
15328348SEric.Yu@Sun.COM 	}
15338348SEric.Yu@Sun.COM 
15348348SEric.Yu@Sun.COM 	connp = icmp_open(family, credp, &error, KM_SLEEP);
15358348SEric.Yu@Sun.COM 	if (connp == NULL) {
15368348SEric.Yu@Sun.COM 		ASSERT(error != NULL);
15378348SEric.Yu@Sun.COM 		inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
15388348SEric.Yu@Sun.COM 		return (error);
15398348SEric.Yu@Sun.COM 	}
15408348SEric.Yu@Sun.COM 
15418348SEric.Yu@Sun.COM 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
15428348SEric.Yu@Sun.COM 	connp->conn_dev = conn_dev;
15438348SEric.Yu@Sun.COM 	connp->conn_minor_arena = ip_minor_arena_sa;
15448348SEric.Yu@Sun.COM 
15458348SEric.Yu@Sun.COM 	is = connp->conn_icmp->icmp_is;
15468348SEric.Yu@Sun.COM 
15478348SEric.Yu@Sun.COM 	/*
15488348SEric.Yu@Sun.COM 	 * Initialize the icmp_t structure for this stream.
15498348SEric.Yu@Sun.COM 	 */
15508348SEric.Yu@Sun.COM 	q->q_ptr = connp;
15518348SEric.Yu@Sun.COM 	WR(q)->q_ptr = connp;
15528348SEric.Yu@Sun.COM 	connp->conn_rq = q;
15538348SEric.Yu@Sun.COM 	connp->conn_wq = WR(q);
15548348SEric.Yu@Sun.COM 
15558348SEric.Yu@Sun.COM 	if (connp->conn_icmp->icmp_family == AF_INET6) {
15568348SEric.Yu@Sun.COM 		/* Build initial header template for transmit */
15578348SEric.Yu@Sun.COM 		rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER);
15588348SEric.Yu@Sun.COM 		if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) {
15598348SEric.Yu@Sun.COM 			rw_exit(&connp->conn_icmp->icmp_rwlock);
15608348SEric.Yu@Sun.COM 			inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
15618348SEric.Yu@Sun.COM 			ipcl_conn_destroy(connp);
15628348SEric.Yu@Sun.COM 			return (error);
15638348SEric.Yu@Sun.COM 		}
15648348SEric.Yu@Sun.COM 		rw_exit(&connp->conn_icmp->icmp_rwlock);
15658348SEric.Yu@Sun.COM 	}
15668348SEric.Yu@Sun.COM 
15678348SEric.Yu@Sun.COM 
15688348SEric.Yu@Sun.COM 	q->q_hiwat = is->is_recv_hiwat;
15698348SEric.Yu@Sun.COM 	WR(q)->q_hiwat = is->is_xmit_hiwat;
15708348SEric.Yu@Sun.COM 	WR(q)->q_lowat = is->is_xmit_lowat;
15718348SEric.Yu@Sun.COM 
15728348SEric.Yu@Sun.COM 	qprocson(q);
15738348SEric.Yu@Sun.COM 
15748348SEric.Yu@Sun.COM 	/* Set the Stream head write offset. */
15758348SEric.Yu@Sun.COM 	(void) proto_set_tx_wroff(q, connp,
15768348SEric.Yu@Sun.COM 	    connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra);
15778348SEric.Yu@Sun.COM 	(void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat);
15788348SEric.Yu@Sun.COM 
15798348SEric.Yu@Sun.COM 	mutex_enter(&connp->conn_lock);
15808348SEric.Yu@Sun.COM 	connp->conn_state_flags &= ~CONN_INCIPIENT;
15818348SEric.Yu@Sun.COM 	mutex_exit(&connp->conn_lock);
15828348SEric.Yu@Sun.COM 
15838348SEric.Yu@Sun.COM 	return (0);
15848348SEric.Yu@Sun.COM }
15858348SEric.Yu@Sun.COM 
15868348SEric.Yu@Sun.COM /* For /dev/icmp4 aka AF_INET open */
15878348SEric.Yu@Sun.COM static int
15888348SEric.Yu@Sun.COM icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
15898348SEric.Yu@Sun.COM {
15908348SEric.Yu@Sun.COM 	return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET));
15918348SEric.Yu@Sun.COM }
15928348SEric.Yu@Sun.COM 
15938348SEric.Yu@Sun.COM /* For /dev/icmp6 aka AF_INET6 open */
15948348SEric.Yu@Sun.COM static int
15958348SEric.Yu@Sun.COM icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
15968348SEric.Yu@Sun.COM {
15978348SEric.Yu@Sun.COM 	return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6));
15988348SEric.Yu@Sun.COM }
15998348SEric.Yu@Sun.COM 
16008348SEric.Yu@Sun.COM /*
16018348SEric.Yu@Sun.COM  * This is the open routine for icmp.  It allocates a icmp_t structure for
16028348SEric.Yu@Sun.COM  * the stream and, on the first open of the module, creates an ND table.
16038348SEric.Yu@Sun.COM  */
16048348SEric.Yu@Sun.COM /* ARGSUSED */
16058348SEric.Yu@Sun.COM static conn_t *
16068348SEric.Yu@Sun.COM icmp_open(int family, cred_t *credp, int *err, int flags)
16078348SEric.Yu@Sun.COM {
16088348SEric.Yu@Sun.COM 	icmp_t	*icmp;
16098348SEric.Yu@Sun.COM 	conn_t *connp;
16108348SEric.Yu@Sun.COM 	zoneid_t zoneid;
16118348SEric.Yu@Sun.COM 	netstack_t *ns;
16128348SEric.Yu@Sun.COM 	icmp_stack_t *is;
16138348SEric.Yu@Sun.COM 	boolean_t isv6 = B_FALSE;
16148348SEric.Yu@Sun.COM 
16158348SEric.Yu@Sun.COM 	*err = secpolicy_net_icmpaccess(credp);
16168348SEric.Yu@Sun.COM 	if (*err != 0)
16178348SEric.Yu@Sun.COM 		return (NULL);
16188348SEric.Yu@Sun.COM 
16198348SEric.Yu@Sun.COM 	if (family == AF_INET6)
16208348SEric.Yu@Sun.COM 		isv6 = B_TRUE;
16213448Sdh155122 	ns = netstack_find_by_cred(credp);
16223448Sdh155122 	ASSERT(ns != NULL);
16233448Sdh155122 	is = ns->netstack_icmp;
16243448Sdh155122 	ASSERT(is != NULL);
16253448Sdh155122 
16263448Sdh155122 	/*
16273448Sdh155122 	 * For exclusive stacks we set the zoneid to zero
16283448Sdh155122 	 * to make ICMP operate as if in the global zone.
16293448Sdh155122 	 */
16305240Snordmark 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
16313448Sdh155122 		zoneid = GLOBAL_ZONEID;
16323448Sdh155122 	else
16333448Sdh155122 		zoneid = crgetzoneid(credp);
16343448Sdh155122 
16358348SEric.Yu@Sun.COM 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
16368348SEric.Yu@Sun.COM 
16378348SEric.Yu@Sun.COM 	connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns);
16385240Snordmark 	icmp = connp->conn_icmp;
16398348SEric.Yu@Sun.COM 	icmp->icmp_v6dst = sin6_null;
16400Sstevel@tonic-gate 
16410Sstevel@tonic-gate 	/*
16425240Snordmark 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
16435240Snordmark 	 * done by netstack_find_by_cred()
16445240Snordmark 	 */
16455240Snordmark 	netstack_rele(ns);
16465240Snordmark 
16475240Snordmark 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
16485240Snordmark 	ASSERT(connp->conn_ulp == IPPROTO_ICMP);
16495240Snordmark 	ASSERT(connp->conn_icmp == icmp);
16505240Snordmark 	ASSERT(icmp->icmp_connp == connp);
16510Sstevel@tonic-gate 
16520Sstevel@tonic-gate 	/* Set the initial state of the stream and the privilege status. */
16530Sstevel@tonic-gate 	icmp->icmp_state = TS_UNBND;
16545240Snordmark 	if (isv6) {
16550Sstevel@tonic-gate 		icmp->icmp_ipversion = IPV6_VERSION;
16560Sstevel@tonic-gate 		icmp->icmp_family = AF_INET6;
16575240Snordmark 		connp->conn_ulp = IPPROTO_ICMPV6;
16580Sstevel@tonic-gate 		/* May be changed by a SO_PROTOTYPE socket option. */
16590Sstevel@tonic-gate 		icmp->icmp_proto = IPPROTO_ICMPV6;
16600Sstevel@tonic-gate 		icmp->icmp_checksum_off = 2;	/* Offset for icmp6_cksum */
16610Sstevel@tonic-gate 		icmp->icmp_max_hdr_len = IPV6_HDR_LEN;
16623448Sdh155122 		icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit;
16635240Snordmark 		connp->conn_af_isv6 = B_TRUE;
16645240Snordmark 		connp->conn_flags |= IPCL_ISV6;
16650Sstevel@tonic-gate 	} else {
16660Sstevel@tonic-gate 		icmp->icmp_ipversion = IPV4_VERSION;
16670Sstevel@tonic-gate 		icmp->icmp_family = AF_INET;
16680Sstevel@tonic-gate 		/* May be changed by a SO_PROTOTYPE socket option. */
16690Sstevel@tonic-gate 		icmp->icmp_proto = IPPROTO_ICMP;
16700Sstevel@tonic-gate 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH;
16713448Sdh155122 		icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl;
16725240Snordmark 		connp->conn_af_isv6 = B_FALSE;
16735240Snordmark 		connp->conn_flags &= ~IPCL_ISV6;
16740Sstevel@tonic-gate 	}
16755240Snordmark 	icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
16765240Snordmark 	icmp->icmp_pending_op = -1;
16775240Snordmark 	connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
16785240Snordmark 	connp->conn_zoneid = zoneid;
16790Sstevel@tonic-gate 
16800Sstevel@tonic-gate 	/*
16815240Snordmark 	 * If the caller has the process-wide flag set, then default to MAC
16825240Snordmark 	 * exempt mode.  This allows read-down to unlabeled hosts.
16830Sstevel@tonic-gate 	 */
16845240Snordmark 	if (getpflags(NET_MAC_AWARE, credp) != 0)
16856596Skp158701 		connp->conn_mac_exempt = B_TRUE;
16865240Snordmark 
16875240Snordmark 	connp->conn_ulp_labeled = is_system_labeled();
16885240Snordmark 
16895240Snordmark 	icmp->icmp_is = is;
16905240Snordmark 
16915240Snordmark 	connp->conn_recv = icmp_input;
16925240Snordmark 	crhold(credp);
16935240Snordmark 	connp->conn_cred = credp;
16945240Snordmark 
16955240Snordmark 	rw_exit(&icmp->icmp_rwlock);
16965240Snordmark 
16978348SEric.Yu@Sun.COM 	connp->conn_flow_cntrld = B_FALSE;
16988348SEric.Yu@Sun.COM 	return (connp);
16990Sstevel@tonic-gate }
17000Sstevel@tonic-gate 
17010Sstevel@tonic-gate /*
17020Sstevel@tonic-gate  * Which ICMP options OK to set through T_UNITDATA_REQ...
17030Sstevel@tonic-gate  */
17040Sstevel@tonic-gate /* ARGSUSED */
17050Sstevel@tonic-gate static boolean_t
17060Sstevel@tonic-gate icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
17070Sstevel@tonic-gate {
17080Sstevel@tonic-gate 	return (B_TRUE);
17090Sstevel@tonic-gate }
17100Sstevel@tonic-gate 
17110Sstevel@tonic-gate /*
17120Sstevel@tonic-gate  * This routine gets default values of certain options whose default
17130Sstevel@tonic-gate  * values are maintained by protcol specific code
17140Sstevel@tonic-gate  */
17150Sstevel@tonic-gate /* ARGSUSED */
17160Sstevel@tonic-gate int
17170Sstevel@tonic-gate icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
17180Sstevel@tonic-gate {
17195240Snordmark 	icmp_t *icmp = Q_TO_ICMP(q);
17203448Sdh155122 	icmp_stack_t *is = icmp->icmp_is;
17210Sstevel@tonic-gate 	int *i1 = (int *)ptr;
17220Sstevel@tonic-gate 
17230Sstevel@tonic-gate 	switch (level) {
17240Sstevel@tonic-gate 	case IPPROTO_IP:
17250Sstevel@tonic-gate 		switch (name) {
17260Sstevel@tonic-gate 		case IP_MULTICAST_TTL:
17270Sstevel@tonic-gate 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
17280Sstevel@tonic-gate 			return (sizeof (uchar_t));
17290Sstevel@tonic-gate 		case IP_MULTICAST_LOOP:
17300Sstevel@tonic-gate 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
17310Sstevel@tonic-gate 			return (sizeof (uchar_t));
17320Sstevel@tonic-gate 		}
17330Sstevel@tonic-gate 		break;
17340Sstevel@tonic-gate 	case IPPROTO_IPV6:
17350Sstevel@tonic-gate 		switch (name) {
17360Sstevel@tonic-gate 		case IPV6_MULTICAST_HOPS:
17370Sstevel@tonic-gate 			*i1 = IP_DEFAULT_MULTICAST_TTL;
17380Sstevel@tonic-gate 			return (sizeof (int));
17390Sstevel@tonic-gate 		case IPV6_MULTICAST_LOOP:
17400Sstevel@tonic-gate 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
17410Sstevel@tonic-gate 			return (sizeof (int));
17420Sstevel@tonic-gate 		case IPV6_UNICAST_HOPS:
17433448Sdh155122 			*i1 = is->is_ipv6_hoplimit;
17440Sstevel@tonic-gate 			return (sizeof (int));
17450Sstevel@tonic-gate 		}
17460Sstevel@tonic-gate 		break;
17470Sstevel@tonic-gate 	case IPPROTO_ICMPV6:
17480Sstevel@tonic-gate 		switch (name) {
17490Sstevel@tonic-gate 		case ICMP6_FILTER:
17500Sstevel@tonic-gate 			/* Make it look like "pass all" */
17510Sstevel@tonic-gate 			ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
17520Sstevel@tonic-gate 			return (sizeof (icmp6_filter_t));
17530Sstevel@tonic-gate 		}
17540Sstevel@tonic-gate 		break;
17550Sstevel@tonic-gate 	}
17560Sstevel@tonic-gate 	return (-1);
17570Sstevel@tonic-gate }
17580Sstevel@tonic-gate 
17590Sstevel@tonic-gate /*
17600Sstevel@tonic-gate  * This routine retrieves the current status of socket options.
17610Sstevel@tonic-gate  * It returns the size of the option retrieved.
17620Sstevel@tonic-gate  */
17630Sstevel@tonic-gate int
17648348SEric.Yu@Sun.COM icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
17650Sstevel@tonic-gate {
17668348SEric.Yu@Sun.COM 	icmp_t		*icmp = connp->conn_icmp;
17678348SEric.Yu@Sun.COM 	icmp_stack_t	*is = icmp->icmp_is;
17688348SEric.Yu@Sun.COM 	int		*i1 = (int *)ptr;
17690Sstevel@tonic-gate 	ip6_pkt_t	*ipp = &icmp->icmp_sticky_ipp;
17708348SEric.Yu@Sun.COM 	int		ret = 0;
17718348SEric.Yu@Sun.COM 
17728348SEric.Yu@Sun.COM 	ASSERT(RW_READ_HELD(&icmp->icmp_rwlock));
17730Sstevel@tonic-gate 	switch (level) {
17740Sstevel@tonic-gate 	case SOL_SOCKET:
17750Sstevel@tonic-gate 		switch (name) {
17760Sstevel@tonic-gate 		case SO_DEBUG:
17770Sstevel@tonic-gate 			*i1 = icmp->icmp_debug;
17780Sstevel@tonic-gate 			break;
17790Sstevel@tonic-gate 		case SO_TYPE:
17800Sstevel@tonic-gate 			*i1 = SOCK_RAW;
17810Sstevel@tonic-gate 			break;
17820Sstevel@tonic-gate 		case SO_PROTOTYPE:
17830Sstevel@tonic-gate 			*i1 = icmp->icmp_proto;
17840Sstevel@tonic-gate 			break;
17850Sstevel@tonic-gate 		case SO_REUSEADDR:
17860Sstevel@tonic-gate 			*i1 = icmp->icmp_reuseaddr;
17870Sstevel@tonic-gate 			break;
17880Sstevel@tonic-gate 
17890Sstevel@tonic-gate 		/*
17900Sstevel@tonic-gate 		 * The following three items are available here,
17910Sstevel@tonic-gate 		 * but are only meaningful to IP.
17920Sstevel@tonic-gate 		 */
17930Sstevel@tonic-gate 		case SO_DONTROUTE:
17940Sstevel@tonic-gate 			*i1 = icmp->icmp_dontroute;
17950Sstevel@tonic-gate 			break;
17960Sstevel@tonic-gate 		case SO_USELOOPBACK:
17970Sstevel@tonic-gate 			*i1 = icmp->icmp_useloopback;
17980Sstevel@tonic-gate 			break;
17990Sstevel@tonic-gate 		case SO_BROADCAST:
18000Sstevel@tonic-gate 			*i1 = icmp->icmp_broadcast;
18010Sstevel@tonic-gate 			break;
18020Sstevel@tonic-gate 
18030Sstevel@tonic-gate 		case SO_SNDBUF:
18048348SEric.Yu@Sun.COM 			ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX);
18058348SEric.Yu@Sun.COM 			*i1 = icmp->icmp_xmit_hiwat;
18060Sstevel@tonic-gate 			break;
18070Sstevel@tonic-gate 		case SO_RCVBUF:
18088348SEric.Yu@Sun.COM 			ASSERT(icmp->icmp_recv_hiwat <= INT_MAX);
18098348SEric.Yu@Sun.COM 			*i1 = icmp->icmp_recv_hiwat;
18100Sstevel@tonic-gate 			break;
18110Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
18120Sstevel@tonic-gate 			*i1 = icmp->icmp_dgram_errind;
18130Sstevel@tonic-gate 			break;
18141673Sgt145670 		case SO_TIMESTAMP:
18151673Sgt145670 			*i1 = icmp->icmp_timestamp;
18161673Sgt145670 			break;
18171676Sjpk 		case SO_MAC_EXEMPT:
18186596Skp158701 			*i1 = connp->conn_mac_exempt;
18191676Sjpk 			break;
18203388Skcpoon 		case SO_DOMAIN:
18213388Skcpoon 			*i1 = icmp->icmp_family;
18223388Skcpoon 			break;
18233388Skcpoon 
18240Sstevel@tonic-gate 		/*
18252263Ssommerfe 		 * Following four not meaningful for icmp
18260Sstevel@tonic-gate 		 * Action is same as "default" to which we fallthrough
18270Sstevel@tonic-gate 		 * so we keep them in comments.
18280Sstevel@tonic-gate 		 * case SO_LINGER:
18290Sstevel@tonic-gate 		 * case SO_KEEPALIVE:
18300Sstevel@tonic-gate 		 * case SO_OOBINLINE:
18312263Ssommerfe 		 * case SO_ALLZONES:
18320Sstevel@tonic-gate 		 */
18330Sstevel@tonic-gate 		default:
18348348SEric.Yu@Sun.COM 			ret = -1;
18358348SEric.Yu@Sun.COM 			goto done;
18360Sstevel@tonic-gate 		}
18370Sstevel@tonic-gate 		break;
18380Sstevel@tonic-gate 	case IPPROTO_IP:
18390Sstevel@tonic-gate 		/*
18400Sstevel@tonic-gate 		 * Only allow IPv4 option processing on IPv4 sockets.
18410Sstevel@tonic-gate 		 */
18428348SEric.Yu@Sun.COM 		if (icmp->icmp_family != AF_INET) {
18438348SEric.Yu@Sun.COM 			ret = -1;
18448348SEric.Yu@Sun.COM 			goto done;
18458348SEric.Yu@Sun.COM 		}
18460Sstevel@tonic-gate 
18470Sstevel@tonic-gate 		switch (name) {
18480Sstevel@tonic-gate 		case IP_OPTIONS:
18490Sstevel@tonic-gate 		case T_IP_OPTIONS:
18500Sstevel@tonic-gate 			/* Options are passed up with each packet */
18518348SEric.Yu@Sun.COM 			ret = 0;
18528348SEric.Yu@Sun.COM 			goto done;
18530Sstevel@tonic-gate 		case IP_HDRINCL:
18540Sstevel@tonic-gate 			*i1 = (int)icmp->icmp_hdrincl;
18550Sstevel@tonic-gate 			break;
18560Sstevel@tonic-gate 		case IP_TOS:
18570Sstevel@tonic-gate 		case T_IP_TOS:
18580Sstevel@tonic-gate 			*i1 = (int)icmp->icmp_type_of_service;
18590Sstevel@tonic-gate 			break;
18600Sstevel@tonic-gate 		case IP_TTL:
18610Sstevel@tonic-gate 			*i1 = (int)icmp->icmp_ttl;
18620Sstevel@tonic-gate 			break;
18630Sstevel@tonic-gate 		case IP_MULTICAST_IF:
18640Sstevel@tonic-gate 			/* 0 address if not set */
18650Sstevel@tonic-gate 			*(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr;
18668348SEric.Yu@Sun.COM 			ret = sizeof (ipaddr_t);
18678348SEric.Yu@Sun.COM 			goto done;
18680Sstevel@tonic-gate 		case IP_MULTICAST_TTL:
18690Sstevel@tonic-gate 			*(uchar_t *)ptr = icmp->icmp_multicast_ttl;
18708348SEric.Yu@Sun.COM 			ret = sizeof (uchar_t);
18718348SEric.Yu@Sun.COM 			goto done;
18720Sstevel@tonic-gate 		case IP_MULTICAST_LOOP:
18735240Snordmark 			*ptr = connp->conn_multicast_loop;
18748348SEric.Yu@Sun.COM 			ret = sizeof (uint8_t);
18758348SEric.Yu@Sun.COM 			goto done;
18760Sstevel@tonic-gate 		case IP_BOUND_IF:
18770Sstevel@tonic-gate 			/* Zero if not set */
18780Sstevel@tonic-gate 			*i1 = icmp->icmp_bound_if;
18790Sstevel@tonic-gate 			break;	/* goto sizeof (int) option return */
18800Sstevel@tonic-gate 		case IP_UNSPEC_SRC:
18810Sstevel@tonic-gate 			*ptr = icmp->icmp_unspec_source;
18820Sstevel@tonic-gate 			break;	/* goto sizeof (int) option return */
18838348SEric.Yu@Sun.COM 		case IP_RECVIF:
18848348SEric.Yu@Sun.COM 			*ptr = icmp->icmp_recvif;
18858348SEric.Yu@Sun.COM 			break;	/* goto sizeof (int) option return */
18865455Smeem 		case IP_BROADCAST_TTL:
18875455Smeem 			*(uchar_t *)ptr = connp->conn_broadcast_ttl;
18885455Smeem 			return (sizeof (uchar_t));
18893318Srshoaib 		case IP_RECVPKTINFO:
18903318Srshoaib 			/*
18913318Srshoaib 			 * This also handles IP_PKTINFO.
18923318Srshoaib 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
18933318Srshoaib 			 * Differentiation is based on the size of the argument
18943318Srshoaib 			 * passed in.
18953318Srshoaib 			 * This option is handled in IP which will return an
18963318Srshoaib 			 * error for IP_PKTINFO as it's not supported as a
18973318Srshoaib 			 * sticky option.
18983318Srshoaib 			 */
18998348SEric.Yu@Sun.COM 			ret = -EINVAL;
19008348SEric.Yu@Sun.COM 			goto done;
19010Sstevel@tonic-gate 		/*
19020Sstevel@tonic-gate 		 * Cannot "get" the value of following options
19030Sstevel@tonic-gate 		 * at this level. Action is same as "default" to
19040Sstevel@tonic-gate 		 * which we fallthrough so we keep them in comments.
19050Sstevel@tonic-gate 		 *
19060Sstevel@tonic-gate 		 * case IP_ADD_MEMBERSHIP:
19070Sstevel@tonic-gate 		 * case IP_DROP_MEMBERSHIP:
19080Sstevel@tonic-gate 		 * case IP_BLOCK_SOURCE:
19090Sstevel@tonic-gate 		 * case IP_UNBLOCK_SOURCE:
19100Sstevel@tonic-gate 		 * case IP_ADD_SOURCE_MEMBERSHIP:
19110Sstevel@tonic-gate 		 * case IP_DROP_SOURCE_MEMBERSHIP:
19120Sstevel@tonic-gate 		 * case MCAST_JOIN_GROUP:
19130Sstevel@tonic-gate 		 * case MCAST_LEAVE_GROUP:
19140Sstevel@tonic-gate 		 * case MCAST_BLOCK_SOURCE:
19150Sstevel@tonic-gate 		 * case MCAST_UNBLOCK_SOURCE:
19160Sstevel@tonic-gate 		 * case MCAST_JOIN_SOURCE_GROUP:
19170Sstevel@tonic-gate 		 * case MCAST_LEAVE_SOURCE_GROUP:
19180Sstevel@tonic-gate 		 * case MRT_INIT:
19190Sstevel@tonic-gate 		 * case MRT_DONE:
19200Sstevel@tonic-gate 		 * case MRT_ADD_VIF:
19210Sstevel@tonic-gate 		 * case MRT_DEL_VIF:
19220Sstevel@tonic-gate 		 * case MRT_ADD_MFC:
19230Sstevel@tonic-gate 		 * case MRT_DEL_MFC:
19240Sstevel@tonic-gate 		 * case MRT_VERSION:
19250Sstevel@tonic-gate 		 * case MRT_ASSERT:
19260Sstevel@tonic-gate 		 * case IP_SEC_OPT:
19271663Spriyanka 		 * case IP_NEXTHOP:
19280Sstevel@tonic-gate 		 */
19290Sstevel@tonic-gate 		default:
19308348SEric.Yu@Sun.COM 			ret = -1;
19318348SEric.Yu@Sun.COM 			goto done;
19320Sstevel@tonic-gate 		}
19330Sstevel@tonic-gate 		break;
19340Sstevel@tonic-gate 	case IPPROTO_IPV6:
19350Sstevel@tonic-gate 		/*
19360Sstevel@tonic-gate 		 * Only allow IPv6 option processing on native IPv6 sockets.
19370Sstevel@tonic-gate 		 */
19388348SEric.Yu@Sun.COM 		if (icmp->icmp_family != AF_INET6) {
19398348SEric.Yu@Sun.COM 			ret = -1;
19408348SEric.Yu@Sun.COM 			goto done;
19418348SEric.Yu@Sun.COM 		}
19420Sstevel@tonic-gate 		switch (name) {
19430Sstevel@tonic-gate 		case IPV6_UNICAST_HOPS:
19440Sstevel@tonic-gate 			*i1 = (unsigned int)icmp->icmp_ttl;
19450Sstevel@tonic-gate 			break;
19460Sstevel@tonic-gate 		case IPV6_MULTICAST_IF:
19470Sstevel@tonic-gate 			/* 0 index if not set */
19480Sstevel@tonic-gate 			*i1 = icmp->icmp_multicast_if_index;
19490Sstevel@tonic-gate 			break;
19500Sstevel@tonic-gate 		case IPV6_MULTICAST_HOPS:
19510Sstevel@tonic-gate 			*i1 = icmp->icmp_multicast_ttl;
19520Sstevel@tonic-gate 			break;
19530Sstevel@tonic-gate 		case IPV6_MULTICAST_LOOP:
19545240Snordmark 			*i1 = connp->conn_multicast_loop;
19550Sstevel@tonic-gate 			break;
19560Sstevel@tonic-gate 		case IPV6_BOUND_IF:
19570Sstevel@tonic-gate 			/* Zero if not set */
19580Sstevel@tonic-gate 			*i1 = icmp->icmp_bound_if;
19590Sstevel@tonic-gate 			break;
19600Sstevel@tonic-gate 		case IPV6_UNSPEC_SRC:
19610Sstevel@tonic-gate 			*i1 = icmp->icmp_unspec_source;
19620Sstevel@tonic-gate 			break;
19630Sstevel@tonic-gate 		case IPV6_CHECKSUM:
19640Sstevel@tonic-gate 			/*
19650Sstevel@tonic-gate 			 * Return offset or -1 if no checksum offset.
19660Sstevel@tonic-gate 			 * Does not apply to IPPROTO_ICMPV6
19670Sstevel@tonic-gate 			 */
19688348SEric.Yu@Sun.COM 			if (icmp->icmp_proto == IPPROTO_ICMPV6) {
19698348SEric.Yu@Sun.COM 				ret = -1;
19708348SEric.Yu@Sun.COM 				goto done;
19718348SEric.Yu@Sun.COM 			}
19720Sstevel@tonic-gate 
19730Sstevel@tonic-gate 			if (icmp->icmp_raw_checksum) {
19740Sstevel@tonic-gate 				*i1 = icmp->icmp_checksum_off;
19750Sstevel@tonic-gate 			} else {
19760Sstevel@tonic-gate 				*i1 = -1;
19770Sstevel@tonic-gate 			}
19780Sstevel@tonic-gate 			break;
19790Sstevel@tonic-gate 		case IPV6_JOIN_GROUP:
19800Sstevel@tonic-gate 		case IPV6_LEAVE_GROUP:
19810Sstevel@tonic-gate 		case MCAST_JOIN_GROUP:
19820Sstevel@tonic-gate 		case MCAST_LEAVE_GROUP:
19830Sstevel@tonic-gate 		case MCAST_BLOCK_SOURCE:
19840Sstevel@tonic-gate 		case MCAST_UNBLOCK_SOURCE:
19850Sstevel@tonic-gate 		case MCAST_JOIN_SOURCE_GROUP:
19860Sstevel@tonic-gate 		case MCAST_LEAVE_SOURCE_GROUP:
19870Sstevel@tonic-gate 			/* cannot "get" the value for these */
19888348SEric.Yu@Sun.COM 			ret = -1;
19898348SEric.Yu@Sun.COM 			goto done;
19900Sstevel@tonic-gate 		case IPV6_RECVPKTINFO:
19913318Srshoaib 			*i1 = icmp->icmp_ip_recvpktinfo;
19920Sstevel@tonic-gate 			break;
19930Sstevel@tonic-gate 		case IPV6_RECVTCLASS:
19940Sstevel@tonic-gate 			*i1 = icmp->icmp_ipv6_recvtclass;
19950Sstevel@tonic-gate 			break;
19960Sstevel@tonic-gate 		case IPV6_RECVPATHMTU:
19970Sstevel@tonic-gate 			*i1 = icmp->icmp_ipv6_recvpathmtu;
19980Sstevel@tonic-gate 			break;
19990Sstevel@tonic-gate 		case IPV6_V6ONLY:
20000Sstevel@tonic-gate 			*i1 = 1;
20010Sstevel@tonic-gate 			break;
20020Sstevel@tonic-gate 		case IPV6_RECVHOPLIMIT:
20030Sstevel@tonic-gate 			*i1 = icmp->icmp_ipv6_recvhoplimit;
20040Sstevel@tonic-gate 			break;
20050Sstevel@tonic-gate 		case IPV6_RECVHOPOPTS:
20060Sstevel@tonic-gate 			*i1 = icmp->icmp_ipv6_recvhopopts;
20070Sstevel@tonic-gate 			break;
20080Sstevel@tonic-gate 		case IPV6_RECVDSTOPTS:
20090Sstevel@tonic-gate 			*i1 = icmp->icmp_ipv6_recvdstopts;
20100Sstevel@tonic-gate 			break;
20110Sstevel@tonic-gate 		case _OLD_IPV6_RECVDSTOPTS:
20120Sstevel@tonic-gate 			*i1 = icmp->icmp_old_ipv6_recvdstopts;
20130Sstevel@tonic-gate 			break;
20140Sstevel@tonic-gate 		case IPV6_RECVRTHDRDSTOPTS:
20150Sstevel@tonic-gate 			*i1 = icmp->icmp_ipv6_recvrtdstopts;
20160Sstevel@tonic-gate 			break;
20170Sstevel@tonic-gate 		case IPV6_RECVRTHDR:
20180Sstevel@tonic-gate 			*i1 = icmp->icmp_ipv6_recvrthdr;
20190Sstevel@tonic-gate 			break;
20200Sstevel@tonic-gate 		case IPV6_PKTINFO: {
20210Sstevel@tonic-gate 			/* XXX assumes that caller has room for max size! */
20220Sstevel@tonic-gate 			struct in6_pktinfo *pkti;
20230Sstevel@tonic-gate 
20240Sstevel@tonic-gate 			pkti = (struct in6_pktinfo *)ptr;
20250Sstevel@tonic-gate 			if (ipp->ipp_fields & IPPF_IFINDEX)
20260Sstevel@tonic-gate 				pkti->ipi6_ifindex = ipp->ipp_ifindex;
20270Sstevel@tonic-gate 			else
20280Sstevel@tonic-gate 				pkti->ipi6_ifindex = 0;
20290Sstevel@tonic-gate 			if (ipp->ipp_fields & IPPF_ADDR)
20300Sstevel@tonic-gate 				pkti->ipi6_addr = ipp->ipp_addr;
20310Sstevel@tonic-gate 			else
20320Sstevel@tonic-gate 				pkti->ipi6_addr = ipv6_all_zeros;
20338348SEric.Yu@Sun.COM 			ret = sizeof (struct in6_pktinfo);
20348348SEric.Yu@Sun.COM 			goto done;
20350Sstevel@tonic-gate 		}
20360Sstevel@tonic-gate 		case IPV6_NEXTHOP: {
20370Sstevel@tonic-gate 			sin6_t *sin6 = (sin6_t *)ptr;
20380Sstevel@tonic-gate 
20390Sstevel@tonic-gate 			if (!(ipp->ipp_fields & IPPF_NEXTHOP))
20400Sstevel@tonic-gate 				return (0);
20410Sstevel@tonic-gate 			*sin6 = sin6_null;
20420Sstevel@tonic-gate 			sin6->sin6_family = AF_INET6;
20430Sstevel@tonic-gate 			sin6->sin6_addr = ipp->ipp_nexthop;
20448348SEric.Yu@Sun.COM 			ret = (sizeof (sin6_t));
20458348SEric.Yu@Sun.COM 			goto done;
20460Sstevel@tonic-gate 		}
20470Sstevel@tonic-gate 		case IPV6_HOPOPTS:
20480Sstevel@tonic-gate 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
20490Sstevel@tonic-gate 				return (0);
20501676Sjpk 			if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6)
20511676Sjpk 				return (0);
20521676Sjpk 			bcopy((char *)ipp->ipp_hopopts +
20531676Sjpk 			    icmp->icmp_label_len_v6, ptr,
20541676Sjpk 			    ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
20551676Sjpk 			if (icmp->icmp_label_len_v6 > 0) {
20561676Sjpk 				ptr[0] = ((char *)ipp->ipp_hopopts)[0];
20571676Sjpk 				ptr[1] = (ipp->ipp_hopoptslen -
20581676Sjpk 				    icmp->icmp_label_len_v6 + 7) / 8 - 1;
20591676Sjpk 			}
20608348SEric.Yu@Sun.COM 			ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
20618348SEric.Yu@Sun.COM 			goto done;
20620Sstevel@tonic-gate 		case IPV6_RTHDRDSTOPTS:
20630Sstevel@tonic-gate 			if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
20640Sstevel@tonic-gate 				return (0);
20650Sstevel@tonic-gate 			bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
20668348SEric.Yu@Sun.COM 			ret = ipp->ipp_rtdstoptslen;
20678348SEric.Yu@Sun.COM 			goto done;
20680Sstevel@tonic-gate 		case IPV6_RTHDR:
20690Sstevel@tonic-gate 			if (!(ipp->ipp_fields & IPPF_RTHDR))
20700Sstevel@tonic-gate 				return (0);
20710Sstevel@tonic-gate 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
20728348SEric.Yu@Sun.COM 			ret = ipp->ipp_rthdrlen;
20738348SEric.Yu@Sun.COM 			goto done;
20740Sstevel@tonic-gate 		case IPV6_DSTOPTS:
20758348SEric.Yu@Sun.COM 			if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
20768348SEric.Yu@Sun.COM 				ret = 0;
20778348SEric.Yu@Sun.COM 				goto done;
20788348SEric.Yu@Sun.COM 			}
20790Sstevel@tonic-gate 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
20808348SEric.Yu@Sun.COM 			ret = ipp->ipp_dstoptslen;
20818348SEric.Yu@Sun.COM 			goto done;
20820Sstevel@tonic-gate 		case IPV6_PATHMTU:
20838348SEric.Yu@Sun.COM 			if (!(ipp->ipp_fields & IPPF_PATHMTU)) {
20848348SEric.Yu@Sun.COM 				ret = 0;
20858348SEric.Yu@Sun.COM 			} else {
20868348SEric.Yu@Sun.COM 				ret = ip_fill_mtuinfo(
20878348SEric.Yu@Sun.COM 				    &icmp->icmp_v6dst.sin6_addr, 0,
20888348SEric.Yu@Sun.COM 				    (struct ip6_mtuinfo *)ptr,
20898348SEric.Yu@Sun.COM 				    is->is_netstack);
20908348SEric.Yu@Sun.COM 			}
20918348SEric.Yu@Sun.COM 			goto done;
20920Sstevel@tonic-gate 		case IPV6_TCLASS:
20930Sstevel@tonic-gate 			if (ipp->ipp_fields & IPPF_TCLASS)
20940Sstevel@tonic-gate 				*i1 = ipp->ipp_tclass;
20950Sstevel@tonic-gate 			else
20960Sstevel@tonic-gate 				*i1 = IPV6_FLOW_TCLASS(
20970Sstevel@tonic-gate 				    IPV6_DEFAULT_VERS_AND_FLOW);
20980Sstevel@tonic-gate 			break;
20990Sstevel@tonic-gate 		default:
21008348SEric.Yu@Sun.COM 			ret = -1;
21018348SEric.Yu@Sun.COM 			goto done;
21020Sstevel@tonic-gate 		}
21030Sstevel@tonic-gate 		break;
21040Sstevel@tonic-gate 	case IPPROTO_ICMPV6:
21050Sstevel@tonic-gate 		/*
21060Sstevel@tonic-gate 		 * Only allow IPv6 option processing on native IPv6 sockets.
21070Sstevel@tonic-gate 		 */
21088348SEric.Yu@Sun.COM 		if (icmp->icmp_family != AF_INET6) {
21098348SEric.Yu@Sun.COM 			ret = -1;
21108348SEric.Yu@Sun.COM 		}
21118348SEric.Yu@Sun.COM 
21128348SEric.Yu@Sun.COM 		if (icmp->icmp_proto != IPPROTO_ICMPV6) {
21138348SEric.Yu@Sun.COM 			ret = -1;
21148348SEric.Yu@Sun.COM 		}
21150Sstevel@tonic-gate 
21160Sstevel@tonic-gate 		switch (name) {
21170Sstevel@tonic-gate 		case ICMP6_FILTER:
21180Sstevel@tonic-gate 			if (icmp->icmp_filter == NULL) {
21190Sstevel@tonic-gate 				/* Make it look like "pass all" */
21200Sstevel@tonic-gate 				ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
21210Sstevel@tonic-gate 			} else {
21220Sstevel@tonic-gate 				(void) bcopy(icmp->icmp_filter, ptr,
21230Sstevel@tonic-gate 				    sizeof (icmp6_filter_t));
21240Sstevel@tonic-gate 			}
21258348SEric.Yu@Sun.COM 			ret = sizeof (icmp6_filter_t);
21268348SEric.Yu@Sun.COM 			goto done;
21270Sstevel@tonic-gate 		default:
21288348SEric.Yu@Sun.COM 			ret = -1;
21298348SEric.Yu@Sun.COM 			goto done;
21300Sstevel@tonic-gate 		}
21310Sstevel@tonic-gate 	default:
21328348SEric.Yu@Sun.COM 		ret = -1;
21338348SEric.Yu@Sun.COM 		goto done;
21348348SEric.Yu@Sun.COM 	}
21358348SEric.Yu@Sun.COM 	ret = sizeof (int);
21368348SEric.Yu@Sun.COM done:
21378348SEric.Yu@Sun.COM 	return (ret);
21380Sstevel@tonic-gate }
21390Sstevel@tonic-gate 
21405240Snordmark /*
21415240Snordmark  * This routine retrieves the current status of socket options.
21425240Snordmark  * It returns the size of the option retrieved.
21435240Snordmark  */
21445240Snordmark int
21458348SEric.Yu@Sun.COM icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
21465240Snordmark {
21478348SEric.Yu@Sun.COM 	conn_t  *connp = Q_TO_CONN(q);
21488348SEric.Yu@Sun.COM 	icmp_t	*icmp = connp->conn_icmp;
21495240Snordmark 	int 	err;
21505240Snordmark 
21515240Snordmark 	rw_enter(&icmp->icmp_rwlock, RW_READER);
21528348SEric.Yu@Sun.COM 	err = icmp_opt_get(connp, level, name, ptr);
21535240Snordmark 	rw_exit(&icmp->icmp_rwlock);
21545240Snordmark 	return (err);
21555240Snordmark }
21565240Snordmark 
21570Sstevel@tonic-gate int
21588348SEric.Yu@Sun.COM icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
21598348SEric.Yu@Sun.COM     uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
21608348SEric.Yu@Sun.COM     void *thisdg_attrs, boolean_t checkonly)
21610Sstevel@tonic-gate {
21628348SEric.Yu@Sun.COM 
21630Sstevel@tonic-gate 	int	*i1 = (int *)invalp;
21640Sstevel@tonic-gate 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
21658348SEric.Yu@Sun.COM 	icmp_t *icmp = connp->conn_icmp;
21668348SEric.Yu@Sun.COM 	icmp_stack_t *is = icmp->icmp_is;
21670Sstevel@tonic-gate 	int	error;
21680Sstevel@tonic-gate 
21698348SEric.Yu@Sun.COM 	ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock));
21700Sstevel@tonic-gate 	/*
21710Sstevel@tonic-gate 	 * For fixed length options, no sanity check
21720Sstevel@tonic-gate 	 * of passed in length is done. It is assumed *_optcom_req()
21730Sstevel@tonic-gate 	 * routines do the right thing.
21740Sstevel@tonic-gate 	 */
21750Sstevel@tonic-gate 	switch (level) {
21760Sstevel@tonic-gate 	case SOL_SOCKET:
21770Sstevel@tonic-gate 		switch (name) {
21780Sstevel@tonic-gate 		case SO_DEBUG:
21790Sstevel@tonic-gate 			if (!checkonly)
21800Sstevel@tonic-gate 				icmp->icmp_debug = onoff;
21810Sstevel@tonic-gate 			break;
21820Sstevel@tonic-gate 		case SO_PROTOTYPE:
21830Sstevel@tonic-gate 			if ((*i1 & 0xFF) != IPPROTO_ICMP &&
21840Sstevel@tonic-gate 			    (*i1 & 0xFF) != IPPROTO_ICMPV6 &&
21850Sstevel@tonic-gate 			    secpolicy_net_rawaccess(cr) != 0) {
21860Sstevel@tonic-gate 				*outlenp = 0;
21870Sstevel@tonic-gate 				return (EACCES);
21880Sstevel@tonic-gate 			}
21890Sstevel@tonic-gate 			/* Can't use IPPROTO_RAW with IPv6 */
21900Sstevel@tonic-gate 			if ((*i1 & 0xFF) == IPPROTO_RAW &&
21910Sstevel@tonic-gate 			    icmp->icmp_family == AF_INET6) {
21920Sstevel@tonic-gate 				*outlenp = 0;
21930Sstevel@tonic-gate 				return (EPROTONOSUPPORT);
21940Sstevel@tonic-gate 			}
21950Sstevel@tonic-gate 			if (checkonly) {
21960Sstevel@tonic-gate 				/* T_CHECK case */
21970Sstevel@tonic-gate 				*(int *)outvalp = (*i1 & 0xFF);
21980Sstevel@tonic-gate 				break;
21990Sstevel@tonic-gate 			}
22000Sstevel@tonic-gate 			icmp->icmp_proto = *i1 & 0xFF;
22010Sstevel@tonic-gate 			if ((icmp->icmp_proto == IPPROTO_RAW ||
22020Sstevel@tonic-gate 			    icmp->icmp_proto == IPPROTO_IGMP) &&
22030Sstevel@tonic-gate 			    icmp->icmp_family == AF_INET)
22040Sstevel@tonic-gate 				icmp->icmp_hdrincl = 1;
22050Sstevel@tonic-gate 			else
22060Sstevel@tonic-gate 				icmp->icmp_hdrincl = 0;
22070Sstevel@tonic-gate 
22080Sstevel@tonic-gate 			if (icmp->icmp_family == AF_INET6 &&
22090Sstevel@tonic-gate 			    icmp->icmp_proto == IPPROTO_ICMPV6) {
22100Sstevel@tonic-gate 				/* Set offset for icmp6_cksum */
22110Sstevel@tonic-gate 				icmp->icmp_raw_checksum = 0;
22120Sstevel@tonic-gate 				icmp->icmp_checksum_off = 2;
22130Sstevel@tonic-gate 			}
22140Sstevel@tonic-gate 			if (icmp->icmp_proto == IPPROTO_UDP ||
22150Sstevel@tonic-gate 			    icmp->icmp_proto == IPPROTO_TCP ||
22160Sstevel@tonic-gate 			    icmp->icmp_proto == IPPROTO_SCTP) {
22170Sstevel@tonic-gate 				icmp->icmp_no_tp_cksum = 1;
22180Sstevel@tonic-gate 				icmp->icmp_sticky_ipp.ipp_fields |=
22190Sstevel@tonic-gate 				    IPPF_NO_CKSUM;
22200Sstevel@tonic-gate 			} else {
22210Sstevel@tonic-gate 				icmp->icmp_no_tp_cksum = 0;
22220Sstevel@tonic-gate 				icmp->icmp_sticky_ipp.ipp_fields &=
22230Sstevel@tonic-gate 				    ~IPPF_NO_CKSUM;
22240Sstevel@tonic-gate 			}
22250Sstevel@tonic-gate 
22260Sstevel@tonic-gate 			if (icmp->icmp_filter != NULL &&
22270Sstevel@tonic-gate 			    icmp->icmp_proto != IPPROTO_ICMPV6) {
22280Sstevel@tonic-gate 				kmem_free(icmp->icmp_filter,
22290Sstevel@tonic-gate 				    sizeof (icmp6_filter_t));
22300Sstevel@tonic-gate 				icmp->icmp_filter = NULL;
22310Sstevel@tonic-gate 			}
22320Sstevel@tonic-gate 
22330Sstevel@tonic-gate 			/* Rebuild the header template */
22345240Snordmark 			error = icmp_build_hdrs(icmp);
22350Sstevel@tonic-gate 			if (error != 0) {
22360Sstevel@tonic-gate 				*outlenp = 0;
22370Sstevel@tonic-gate 				return (error);
22380Sstevel@tonic-gate 			}
22390Sstevel@tonic-gate 
2240409Skcpoon 			/*
2241409Skcpoon 			 * For SCTP, we don't use icmp_bind_proto() for
2242409Skcpoon 			 * raw socket binding.  Note that we do not need
2243409Skcpoon 			 * to set *outlenp.
22445240Snordmark 			 * FIXME: how does SCTP work?
2245409Skcpoon 			 */
2246409Skcpoon 			if (icmp->icmp_proto == IPPROTO_SCTP)
2247409Skcpoon 				return (0);
2248409Skcpoon 
22490Sstevel@tonic-gate 			*outlenp = sizeof (int);
22500Sstevel@tonic-gate 			*(int *)outvalp = *i1 & 0xFF;
22515240Snordmark 
22525240Snordmark 			/* Drop lock across the bind operation */
22535240Snordmark 			rw_exit(&icmp->icmp_rwlock);
22548348SEric.Yu@Sun.COM 			(void) icmp_bind_proto(connp);
22555240Snordmark 			rw_enter(&icmp->icmp_rwlock, RW_WRITER);
22560Sstevel@tonic-gate 			return (0);
22570Sstevel@tonic-gate 		case SO_REUSEADDR:
22588348SEric.Yu@Sun.COM 			if (!checkonly) {
22590Sstevel@tonic-gate 				icmp->icmp_reuseaddr = onoff;
22608348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
22618348SEric.Yu@Sun.COM 			}
22620Sstevel@tonic-gate 			break;
22630Sstevel@tonic-gate 
22640Sstevel@tonic-gate 		/*
22650Sstevel@tonic-gate 		 * The following three items are available here,
22660Sstevel@tonic-gate 		 * but are only meaningful to IP.
22670Sstevel@tonic-gate 		 */
22680Sstevel@tonic-gate 		case SO_DONTROUTE:
22698348SEric.Yu@Sun.COM 			if (!checkonly) {
22700Sstevel@tonic-gate 				icmp->icmp_dontroute = onoff;
22718348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
22728348SEric.Yu@Sun.COM 			}
22730Sstevel@tonic-gate 			break;
22740Sstevel@tonic-gate 		case SO_USELOOPBACK:
22758348SEric.Yu@Sun.COM 			if (!checkonly) {
22760Sstevel@tonic-gate 				icmp->icmp_useloopback = onoff;
22778348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
22788348SEric.Yu@Sun.COM 			}
22790Sstevel@tonic-gate 			break;
22800Sstevel@tonic-gate 		case SO_BROADCAST:
22818348SEric.Yu@Sun.COM 			if (!checkonly) {
22820Sstevel@tonic-gate 				icmp->icmp_broadcast = onoff;
22838348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
22848348SEric.Yu@Sun.COM 			}
22850Sstevel@tonic-gate 			break;
22860Sstevel@tonic-gate 
22870Sstevel@tonic-gate 		case SO_SNDBUF:
22883448Sdh155122 			if (*i1 > is->is_max_buf) {
22890Sstevel@tonic-gate 				*outlenp = 0;
22900Sstevel@tonic-gate 				return (ENOBUFS);
22910Sstevel@tonic-gate 			}
22920Sstevel@tonic-gate 			if (!checkonly) {
22938348SEric.Yu@Sun.COM 				if (!IPCL_IS_NONSTR(connp)) {
22948348SEric.Yu@Sun.COM 					connp->conn_wq->q_hiwat = *i1;
22958348SEric.Yu@Sun.COM 				}
22968348SEric.Yu@Sun.COM 				icmp->icmp_xmit_hiwat = *i1;
22970Sstevel@tonic-gate 			}
22980Sstevel@tonic-gate 			break;
22990Sstevel@tonic-gate 		case SO_RCVBUF:
23003448Sdh155122 			if (*i1 > is->is_max_buf) {
23010Sstevel@tonic-gate 				*outlenp = 0;
23020Sstevel@tonic-gate 				return (ENOBUFS);
23030Sstevel@tonic-gate 			}
23040Sstevel@tonic-gate 			if (!checkonly) {
23058348SEric.Yu@Sun.COM 				icmp->icmp_recv_hiwat = *i1;
23065240Snordmark 				rw_exit(&icmp->icmp_rwlock);
23078348SEric.Yu@Sun.COM 				(void) proto_set_rx_hiwat(connp->conn_rq, connp,
23088348SEric.Yu@Sun.COM 				    *i1);
23095240Snordmark 				rw_enter(&icmp->icmp_rwlock, RW_WRITER);
23100Sstevel@tonic-gate 			}
23110Sstevel@tonic-gate 			break;
23120Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
23130Sstevel@tonic-gate 			if (!checkonly)
23140Sstevel@tonic-gate 				icmp->icmp_dgram_errind = onoff;
23150Sstevel@tonic-gate 			break;
23162263Ssommerfe 		case SO_ALLZONES:
23172263Ssommerfe 			/*
23182263Ssommerfe 			 * "soft" error (negative)
23192263Ssommerfe 			 * option not handled at this level
23202263Ssommerfe 			 * Note: Do not modify *outlenp
23212263Ssommerfe 			 */
23222263Ssommerfe 			return (-EINVAL);
23231673Sgt145670 		case SO_TIMESTAMP:
23241673Sgt145670 			if (!checkonly) {
23251673Sgt145670 				icmp->icmp_timestamp = onoff;
23261673Sgt145670 			}
23271673Sgt145670 			break;
23281676Sjpk 		case SO_MAC_EXEMPT:
23296596Skp158701 			/*
23306596Skp158701 			 * "soft" error (negative)
23316596Skp158701 			 * option not handled at this level
23326596Skp158701 			 * Note: Do not modify *outlenp
23336596Skp158701 			 */
23346596Skp158701 			return (-EINVAL);
23358586Sshenjian 		case SO_RCVTIMEO:
23368586Sshenjian 		case SO_SNDTIMEO:
23378586Sshenjian 			/*
23388586Sshenjian 			 * Pass these two options in order for third part
23398586Sshenjian 			 * protocol usage. Here just return directly.
23408586Sshenjian 			 */
23418586Sshenjian 			return (0);
23420Sstevel@tonic-gate 		/*
23430Sstevel@tonic-gate 		 * Following three not meaningful for icmp
23440Sstevel@tonic-gate 		 * Action is same as "default" so we keep them
23450Sstevel@tonic-gate 		 * in comments.
23460Sstevel@tonic-gate 		 * case SO_LINGER:
23470Sstevel@tonic-gate 		 * case SO_KEEPALIVE:
23480Sstevel@tonic-gate 		 * case SO_OOBINLINE:
23490Sstevel@tonic-gate 		 */
23500Sstevel@tonic-gate 		default:
23510Sstevel@tonic-gate 			*outlenp = 0;
23520Sstevel@tonic-gate 			return (EINVAL);
23530Sstevel@tonic-gate 		}
23540Sstevel@tonic-gate 		break;
23550Sstevel@tonic-gate 	case IPPROTO_IP:
23560Sstevel@tonic-gate 		/*
23570Sstevel@tonic-gate 		 * Only allow IPv4 option processing on IPv4 sockets.
23580Sstevel@tonic-gate 		 */
23590Sstevel@tonic-gate 		if (icmp->icmp_family != AF_INET) {
23600Sstevel@tonic-gate 			*outlenp = 0;
23610Sstevel@tonic-gate 			return (ENOPROTOOPT);
23620Sstevel@tonic-gate 		}
23630Sstevel@tonic-gate 		switch (name) {
23640Sstevel@tonic-gate 		case IP_OPTIONS:
23650Sstevel@tonic-gate 		case T_IP_OPTIONS:
23660Sstevel@tonic-gate 			/* Save options for use by IP. */
23671676Sjpk 			if ((inlen & 0x3) ||
23681676Sjpk 			    inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) {
23690Sstevel@tonic-gate 				*outlenp = 0;
23700Sstevel@tonic-gate 				return (EINVAL);
23710Sstevel@tonic-gate 			}
23720Sstevel@tonic-gate 			if (checkonly)
23730Sstevel@tonic-gate 				break;
23740Sstevel@tonic-gate 
23751676Sjpk 			if (!tsol_option_set(&icmp->icmp_ip_snd_options,
23761676Sjpk 			    &icmp->icmp_ip_snd_options_len,
23771676Sjpk 			    icmp->icmp_label_len, invalp, inlen)) {
23781676Sjpk 				*outlenp = 0;
23791676Sjpk 				return (ENOMEM);
23800Sstevel@tonic-gate 			}
23811676Sjpk 
23820Sstevel@tonic-gate 			icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
23830Sstevel@tonic-gate 			    icmp->icmp_ip_snd_options_len;
23845240Snordmark 			rw_exit(&icmp->icmp_rwlock);
23858348SEric.Yu@Sun.COM 			(void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL:
23868348SEric.Yu@Sun.COM 			    RD(connp->conn_rq), connp,
23878348SEric.Yu@Sun.COM 			    icmp->icmp_max_hdr_len + is->is_wroff_extra);
23885240Snordmark 			rw_enter(&icmp->icmp_rwlock, RW_WRITER);
23890Sstevel@tonic-gate 			break;
23900Sstevel@tonic-gate 		case IP_HDRINCL:
23910Sstevel@tonic-gate 			if (!checkonly)
23920Sstevel@tonic-gate 				icmp->icmp_hdrincl = onoff;
23930Sstevel@tonic-gate 			break;
23940Sstevel@tonic-gate 		case IP_TOS:
23950Sstevel@tonic-gate 		case T_IP_TOS:
23960Sstevel@tonic-gate 			if (!checkonly) {
23970Sstevel@tonic-gate 				icmp->icmp_type_of_service = (uint8_t)*i1;
23980Sstevel@tonic-gate 			}
23990Sstevel@tonic-gate 			break;
24000Sstevel@tonic-gate 		case IP_TTL:
24010Sstevel@tonic-gate 			if (!checkonly) {
24020Sstevel@tonic-gate 				icmp->icmp_ttl = (uint8_t)*i1;
24030Sstevel@tonic-gate 			}
24040Sstevel@tonic-gate 			break;
24050Sstevel@tonic-gate 		case IP_MULTICAST_IF:
24060Sstevel@tonic-gate 			/*
24070Sstevel@tonic-gate 			 * TODO should check OPTMGMT reply and undo this if
24080Sstevel@tonic-gate 			 * there is an error.
24090Sstevel@tonic-gate 			 */
24108348SEric.Yu@Sun.COM 			if (!checkonly) {
24110Sstevel@tonic-gate 				icmp->icmp_multicast_if_addr = *i1;
24128348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
24138348SEric.Yu@Sun.COM 			}
24140Sstevel@tonic-gate 			break;
24150Sstevel@tonic-gate 		case IP_MULTICAST_TTL:
24160Sstevel@tonic-gate 			if (!checkonly)
24170Sstevel@tonic-gate 				icmp->icmp_multicast_ttl = *invalp;
24180Sstevel@tonic-gate 			break;
24190Sstevel@tonic-gate 		case IP_MULTICAST_LOOP:
24200Sstevel@tonic-gate 			if (!checkonly) {
24215240Snordmark 				connp->conn_multicast_loop =
24220Sstevel@tonic-gate 				    (*invalp == 0) ? 0 : 1;
24238348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
24240Sstevel@tonic-gate 			}
24250Sstevel@tonic-gate 			break;
24260Sstevel@tonic-gate 		case IP_BOUND_IF:
24278348SEric.Yu@Sun.COM 			if (!checkonly) {
24280Sstevel@tonic-gate 				icmp->icmp_bound_if = *i1;
24298348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
24308348SEric.Yu@Sun.COM 			}
24310Sstevel@tonic-gate 			break;
24320Sstevel@tonic-gate 		case IP_UNSPEC_SRC:
24338348SEric.Yu@Sun.COM 			if (!checkonly) {
24340Sstevel@tonic-gate 				icmp->icmp_unspec_source = onoff;
24358348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
24368348SEric.Yu@Sun.COM 			}
24370Sstevel@tonic-gate 			break;
24385455Smeem 		case IP_BROADCAST_TTL:
24395455Smeem 			if (!checkonly)
24405455Smeem 				connp->conn_broadcast_ttl = *invalp;
24415455Smeem 			break;
24420Sstevel@tonic-gate 		case IP_RECVIF:
24438348SEric.Yu@Sun.COM 			if (!checkonly) {
24440Sstevel@tonic-gate 				icmp->icmp_recvif = onoff;
24458348SEric.Yu@Sun.COM 			}
24465240Snordmark 			/*
24475240Snordmark 			 * pass to ip
24485240Snordmark 			 */
24495240Snordmark 			return (-EINVAL);
24503318Srshoaib 		case IP_PKTINFO: {
24513318Srshoaib 			/*
24523318Srshoaib 			 * This also handles IP_RECVPKTINFO.
24533318Srshoaib 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
24543318Srshoaib 			 * Differentiation is based on the size of the argument
24553318Srshoaib 			 * passed in.
24563318Srshoaib 			 */
24573318Srshoaib 			struct in_pktinfo *pktinfop;
24583318Srshoaib 			ip4_pkt_t *attr_pktinfop;
24593318Srshoaib 
24603318Srshoaib 			if (checkonly)
24613318Srshoaib 				break;
24623318Srshoaib 
24633318Srshoaib 			if (inlen == sizeof (int)) {
24643318Srshoaib 				/*
24653318Srshoaib 				 * This is IP_RECVPKTINFO option.
24663318Srshoaib 				 * Keep a local copy of wether this option is
24673318Srshoaib 				 * set or not and pass it down to IP for
24683318Srshoaib 				 * processing.
24693318Srshoaib 				 */
24703318Srshoaib 				icmp->icmp_ip_recvpktinfo = onoff;
24713318Srshoaib 				return (-EINVAL);
24723318Srshoaib 			}
24733318Srshoaib 
24743318Srshoaib 
24758348SEric.Yu@Sun.COM 			if (inlen != sizeof (struct in_pktinfo)) {
24763318Srshoaib 				return (EINVAL);
24778348SEric.Yu@Sun.COM 			}
24783318Srshoaib 
24793318Srshoaib 			if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs)
24803318Srshoaib 			    == NULL) {
24813318Srshoaib 				/*
24823318Srshoaib 				 * sticky option is not supported
24833318Srshoaib 				 */
24843318Srshoaib 				return (EINVAL);
24853318Srshoaib 			}
24863318Srshoaib 
24873318Srshoaib 			pktinfop = (struct in_pktinfo *)invalp;
24883318Srshoaib 
24893318Srshoaib 			/*
24903318Srshoaib 			 * Atleast one of the values should be specified
24913318Srshoaib 			 */
24923318Srshoaib 			if (pktinfop->ipi_ifindex == 0 &&
24933318Srshoaib 			    pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) {
24943318Srshoaib 				return (EINVAL);
24953318Srshoaib 			}
24963318Srshoaib 
24973318Srshoaib 			attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr;
24983318Srshoaib 			attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex;
24993318Srshoaib 		}
25003318Srshoaib 			break;
25010Sstevel@tonic-gate 		case IP_ADD_MEMBERSHIP:
25020Sstevel@tonic-gate 		case IP_DROP_MEMBERSHIP:
25030Sstevel@tonic-gate 		case IP_BLOCK_SOURCE:
25040Sstevel@tonic-gate 		case IP_UNBLOCK_SOURCE:
25050Sstevel@tonic-gate 		case IP_ADD_SOURCE_MEMBERSHIP:
25060Sstevel@tonic-gate 		case IP_DROP_SOURCE_MEMBERSHIP:
25070Sstevel@tonic-gate 		case MCAST_JOIN_GROUP:
25080Sstevel@tonic-gate 		case MCAST_LEAVE_GROUP:
25090Sstevel@tonic-gate 		case MCAST_BLOCK_SOURCE:
25100Sstevel@tonic-gate 		case MCAST_UNBLOCK_SOURCE:
25110Sstevel@tonic-gate 		case MCAST_JOIN_SOURCE_GROUP:
25120Sstevel@tonic-gate 		case MCAST_LEAVE_SOURCE_GROUP:
25130Sstevel@tonic-gate 		case MRT_INIT:
25140Sstevel@tonic-gate 		case MRT_DONE:
25150Sstevel@tonic-gate 		case MRT_ADD_VIF:
25160Sstevel@tonic-gate 		case MRT_DEL_VIF:
25170Sstevel@tonic-gate 		case MRT_ADD_MFC:
25180Sstevel@tonic-gate 		case MRT_DEL_MFC:
25190Sstevel@tonic-gate 		case MRT_VERSION:
25200Sstevel@tonic-gate 		case MRT_ASSERT:
25210Sstevel@tonic-gate 		case IP_SEC_OPT:
25221663Spriyanka 		case IP_NEXTHOP:
25230Sstevel@tonic-gate 			/*
25240Sstevel@tonic-gate 			 * "soft" error (negative)
25250Sstevel@tonic-gate 			 * option not handled at this level
25260Sstevel@tonic-gate 			 * Note: Do not modify *outlenp
25270Sstevel@tonic-gate 			 */
25280Sstevel@tonic-gate 			return (-EINVAL);
25290Sstevel@tonic-gate 		default:
25300Sstevel@tonic-gate 			*outlenp = 0;
25310Sstevel@tonic-gate 			return (EINVAL);
25320Sstevel@tonic-gate 		}
25330Sstevel@tonic-gate 		break;
25340Sstevel@tonic-gate 	case IPPROTO_IPV6: {
25350Sstevel@tonic-gate 		ip6_pkt_t		*ipp;
25360Sstevel@tonic-gate 		boolean_t		sticky;
25370Sstevel@tonic-gate 
25380Sstevel@tonic-gate 		if (icmp->icmp_family != AF_INET6) {
25390Sstevel@tonic-gate 			*outlenp = 0;
25400Sstevel@tonic-gate 			return (ENOPROTOOPT);
25410Sstevel@tonic-gate 		}
25420Sstevel@tonic-gate 		/*
25430Sstevel@tonic-gate 		 * Deal with both sticky options and ancillary data
25440Sstevel@tonic-gate 		 */
25450Sstevel@tonic-gate 		if (thisdg_attrs == NULL) {
25460Sstevel@tonic-gate 			/* sticky options, or none */
25470Sstevel@tonic-gate 			ipp = &icmp->icmp_sticky_ipp;
25480Sstevel@tonic-gate 			sticky = B_TRUE;
25490Sstevel@tonic-gate 		} else {
25500Sstevel@tonic-gate 			/* ancillary data */
25510Sstevel@tonic-gate 			ipp = (ip6_pkt_t *)thisdg_attrs;
25520Sstevel@tonic-gate 			sticky = B_FALSE;
25530Sstevel@tonic-gate 		}
25540Sstevel@tonic-gate 
25550Sstevel@tonic-gate 		switch (name) {
25560Sstevel@tonic-gate 		case IPV6_MULTICAST_IF:
25578348SEric.Yu@Sun.COM 			if (!checkonly) {
25580Sstevel@tonic-gate 				icmp->icmp_multicast_if_index = *i1;
25598348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
25608348SEric.Yu@Sun.COM 			}
25610Sstevel@tonic-gate 			break;
25620Sstevel@tonic-gate 		case IPV6_UNICAST_HOPS:
25630Sstevel@tonic-gate 			/* -1 means use default */
25640Sstevel@tonic-gate 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
25650Sstevel@tonic-gate 				*outlenp = 0;
25660Sstevel@tonic-gate 				return (EINVAL);
25670Sstevel@tonic-gate 			}
25680Sstevel@tonic-gate 			if (!checkonly) {
25690Sstevel@tonic-gate 				if (*i1 == -1) {
2570679Sseb 					icmp->icmp_ttl = ipp->ipp_unicast_hops =
25713448Sdh155122 					    is->is_ipv6_hoplimit;
2572679Sseb 					ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
25730Sstevel@tonic-gate 					/* Pass modified value to IP. */
25740Sstevel@tonic-gate 					*i1 = ipp->ipp_hoplimit;
25750Sstevel@tonic-gate 				} else {
2576679Sseb 					icmp->icmp_ttl = ipp->ipp_unicast_hops =
25770Sstevel@tonic-gate 					    (uint8_t)*i1;
2578679Sseb 					ipp->ipp_fields |= IPPF_UNICAST_HOPS;
25790Sstevel@tonic-gate 				}
25800Sstevel@tonic-gate 				/* Rebuild the header template */
25815240Snordmark 				error = icmp_build_hdrs(icmp);
25820Sstevel@tonic-gate 				if (error != 0) {
25830Sstevel@tonic-gate 					*outlenp = 0;
25840Sstevel@tonic-gate 					return (error);
25850Sstevel@tonic-gate 				}
25860Sstevel@tonic-gate 			}
25870Sstevel@tonic-gate 			break;
25880Sstevel@tonic-gate 		case IPV6_MULTICAST_HOPS:
25890Sstevel@tonic-gate 			/* -1 means use default */
25900Sstevel@tonic-gate 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
25910Sstevel@tonic-gate 				*outlenp = 0;
25920Sstevel@tonic-gate 				return (EINVAL);
25930Sstevel@tonic-gate 			}
25940Sstevel@tonic-gate 			if (!checkonly) {
25950Sstevel@tonic-gate 				if (*i1 == -1) {
25960Sstevel@tonic-gate 					icmp->icmp_multicast_ttl =
2597679Sseb 					    ipp->ipp_multicast_hops =
25980Sstevel@tonic-gate 					    IP_DEFAULT_MULTICAST_TTL;
2599679Sseb 					ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
26000Sstevel@tonic-gate 					/* Pass modified value to IP. */
2601679Sseb 					*i1 = icmp->icmp_multicast_ttl;
26020Sstevel@tonic-gate 				} else {
26030Sstevel@tonic-gate 					icmp->icmp_multicast_ttl =
2604679Sseb 					    ipp->ipp_multicast_hops =
26050Sstevel@tonic-gate 					    (uint8_t)*i1;
2606679Sseb 					ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
26070Sstevel@tonic-gate 				}
26080Sstevel@tonic-gate 			}
26090Sstevel@tonic-gate 			break;
26100Sstevel@tonic-gate 		case IPV6_MULTICAST_LOOP:
26110Sstevel@tonic-gate 			if (*i1 != 0 && *i1 != 1) {
26120Sstevel@tonic-gate 				*outlenp = 0;
26130Sstevel@tonic-gate 				return (EINVAL);
26140Sstevel@tonic-gate 			}
26158348SEric.Yu@Sun.COM 			if (!checkonly) {
26165240Snordmark 				connp->conn_multicast_loop = *i1;
26178348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
26188348SEric.Yu@Sun.COM 			}
26190Sstevel@tonic-gate 			break;
26200Sstevel@tonic-gate 		case IPV6_CHECKSUM:
26210Sstevel@tonic-gate 			/*
26220Sstevel@tonic-gate 			 * Integer offset into the user data of where the
26230Sstevel@tonic-gate 			 * checksum is located.
26240Sstevel@tonic-gate 			 * Offset of -1 disables option.
26250Sstevel@tonic-gate 			 * Does not apply to IPPROTO_ICMPV6.
26260Sstevel@tonic-gate 			 */
26270Sstevel@tonic-gate 			if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) {
26280Sstevel@tonic-gate 				*outlenp = 0;
26290Sstevel@tonic-gate 				return (EINVAL);
26300Sstevel@tonic-gate 			}
26310Sstevel@tonic-gate 			if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) {
26320Sstevel@tonic-gate 				/* Negative or not 16 bit aligned offset */
26330Sstevel@tonic-gate 				*outlenp = 0;
26340Sstevel@tonic-gate 				return (EINVAL);
26350Sstevel@tonic-gate 			}
26360Sstevel@tonic-gate 			if (checkonly)
26370Sstevel@tonic-gate 				break;
26380Sstevel@tonic-gate 
26390Sstevel@tonic-gate 			if (*i1 == -1) {
26400Sstevel@tonic-gate 				icmp->icmp_raw_checksum = 0;
26410Sstevel@tonic-gate 				ipp->ipp_fields &= ~IPPF_RAW_CKSUM;
26420Sstevel@tonic-gate 			} else {
26430Sstevel@tonic-gate 				icmp->icmp_raw_checksum = 1;
26440Sstevel@tonic-gate 				icmp->icmp_checksum_off = *i1;
26450Sstevel@tonic-gate 				ipp->ipp_fields |= IPPF_RAW_CKSUM;
26460Sstevel@tonic-gate 			}
26470Sstevel@tonic-gate 			/* Rebuild the header template */
26485240Snordmark 			error = icmp_build_hdrs(icmp);
26490Sstevel@tonic-gate 			if (error != 0) {
26500Sstevel@tonic-gate 				*outlenp = 0;
26510Sstevel@tonic-gate 				return (error);
26520Sstevel@tonic-gate 			}
26530Sstevel@tonic-gate 			break;
26540Sstevel@tonic-gate 		case IPV6_JOIN_GROUP:
26550Sstevel@tonic-gate 		case IPV6_LEAVE_GROUP:
26560Sstevel@tonic-gate 		case MCAST_JOIN_GROUP:
26570Sstevel@tonic-gate 		case MCAST_LEAVE_GROUP:
26580Sstevel@tonic-gate 		case MCAST_BLOCK_SOURCE:
26590Sstevel@tonic-gate 		case MCAST_UNBLOCK_SOURCE:
26600Sstevel@tonic-gate 		case MCAST_JOIN_SOURCE_GROUP:
26610Sstevel@tonic-gate 		case MCAST_LEAVE_SOURCE_GROUP:
26620Sstevel@tonic-gate 			/*
26630Sstevel@tonic-gate 			 * "soft" error (negative)
26640Sstevel@tonic-gate 			 * option not handled at this level
26650Sstevel@tonic-gate 			 * Note: Do not modify *outlenp
26660Sstevel@tonic-gate 			 */
26670Sstevel@tonic-gate 			return (-EINVAL);
26680Sstevel@tonic-gate 		case IPV6_BOUND_IF:
26698348SEric.Yu@Sun.COM 			if (!checkonly) {
26700Sstevel@tonic-gate 				icmp->icmp_bound_if = *i1;
26718348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
26728348SEric.Yu@Sun.COM 			}
26730Sstevel@tonic-gate 			break;
26740Sstevel@tonic-gate 		case IPV6_UNSPEC_SRC:
26758348SEric.Yu@Sun.COM 			if (!checkonly) {
26760Sstevel@tonic-gate 				icmp->icmp_unspec_source = onoff;
26778348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
26788348SEric.Yu@Sun.COM 			}
26790Sstevel@tonic-gate 			break;
26800Sstevel@tonic-gate 		case IPV6_RECVTCLASS:
26818348SEric.Yu@Sun.COM 			if (!checkonly) {
26820Sstevel@tonic-gate 				icmp->icmp_ipv6_recvtclass = onoff;
26838348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
26848348SEric.Yu@Sun.COM 			}
26850Sstevel@tonic-gate 			break;
26860Sstevel@tonic-gate 		/*
26870Sstevel@tonic-gate 		 * Set boolean switches for ancillary data delivery
26880Sstevel@tonic-gate 		 */
26890Sstevel@tonic-gate 		case IPV6_RECVPKTINFO:
26908348SEric.Yu@Sun.COM 			if (!checkonly) {
26913318Srshoaib 				icmp->icmp_ip_recvpktinfo = onoff;
26928348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
26938348SEric.Yu@Sun.COM 			}
26940Sstevel@tonic-gate 			break;
26950Sstevel@tonic-gate 		case IPV6_RECVPATHMTU:
26968348SEric.Yu@Sun.COM 			if (!checkonly) {
26970Sstevel@tonic-gate 				icmp->icmp_ipv6_recvpathmtu = onoff;
26988348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
26998348SEric.Yu@Sun.COM 			}
27000Sstevel@tonic-gate 			break;
27010Sstevel@tonic-gate 		case IPV6_RECVHOPLIMIT:
27028348SEric.Yu@Sun.COM 			if (!checkonly) {
27030Sstevel@tonic-gate 				icmp->icmp_ipv6_recvhoplimit = onoff;
27048348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
27058348SEric.Yu@Sun.COM 			}
27060Sstevel@tonic-gate 			break;
27070Sstevel@tonic-gate 		case IPV6_RECVHOPOPTS:
27088348SEric.Yu@Sun.COM 			if (!checkonly) {
27090Sstevel@tonic-gate 				icmp->icmp_ipv6_recvhopopts = onoff;
27108348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
27118348SEric.Yu@Sun.COM 			}
27120Sstevel@tonic-gate 			break;
27130Sstevel@tonic-gate 		case IPV6_RECVDSTOPTS:
27148348SEric.Yu@Sun.COM 			if (!checkonly) {
27150Sstevel@tonic-gate 				icmp->icmp_ipv6_recvdstopts = onoff;
27168348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
27178348SEric.Yu@Sun.COM 			}
27180Sstevel@tonic-gate 			break;
27190Sstevel@tonic-gate 		case _OLD_IPV6_RECVDSTOPTS:
27200Sstevel@tonic-gate 			if (!checkonly)
27210Sstevel@tonic-gate 				icmp->icmp_old_ipv6_recvdstopts = onoff;
27220Sstevel@tonic-gate 			break;
27230Sstevel@tonic-gate 		case IPV6_RECVRTHDRDSTOPTS:
27248348SEric.Yu@Sun.COM 			if (!checkonly) {
27250Sstevel@tonic-gate 				icmp->icmp_ipv6_recvrtdstopts = onoff;
27268348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
27278348SEric.Yu@Sun.COM 			}
27280Sstevel@tonic-gate 			break;
27290Sstevel@tonic-gate 		case IPV6_RECVRTHDR:
27308348SEric.Yu@Sun.COM 			if (!checkonly) {
27310Sstevel@tonic-gate 				icmp->icmp_ipv6_recvrthdr = onoff;
27328348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
27338348SEric.Yu@Sun.COM 			}
27340Sstevel@tonic-gate 			break;
27350Sstevel@tonic-gate 		/*
27360Sstevel@tonic-gate 		 * Set sticky options or ancillary data.
27370Sstevel@tonic-gate 		 * If sticky options, (re)build any extension headers
27380Sstevel@tonic-gate 		 * that might be needed as a result.
27390Sstevel@tonic-gate 		 */
27400Sstevel@tonic-gate 		case IPV6_PKTINFO:
27410Sstevel@tonic-gate 			/*
27420Sstevel@tonic-gate 			 * The source address and ifindex are verified
27430Sstevel@tonic-gate 			 * in ip_opt_set(). For ancillary data the
27440Sstevel@tonic-gate 			 * source address is checked in ip_wput_v6.
27450Sstevel@tonic-gate 			 */
27468348SEric.Yu@Sun.COM 			if (inlen != 0 && inlen !=
27478348SEric.Yu@Sun.COM 			    sizeof (struct in6_pktinfo)) {
27480Sstevel@tonic-gate 				return (EINVAL);
27498348SEric.Yu@Sun.COM 			}
27500Sstevel@tonic-gate 			if (checkonly)
27510Sstevel@tonic-gate 				break;
27520Sstevel@tonic-gate 
27530Sstevel@tonic-gate 			if (inlen == 0) {
27540Sstevel@tonic-gate 				ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
27550Sstevel@tonic-gate 				ipp->ipp_sticky_ignored |=
27560Sstevel@tonic-gate 				    (IPPF_IFINDEX|IPPF_ADDR);
27570Sstevel@tonic-gate 			} else {
27580Sstevel@tonic-gate 				struct in6_pktinfo *pkti;
27590Sstevel@tonic-gate 
27600Sstevel@tonic-gate 				pkti = (struct in6_pktinfo *)invalp;
27610Sstevel@tonic-gate 				ipp->ipp_ifindex = pkti->ipi6_ifindex;
27620Sstevel@tonic-gate 				ipp->ipp_addr = pkti->ipi6_addr;
27630Sstevel@tonic-gate 				if (ipp->ipp_ifindex != 0)
27640Sstevel@tonic-gate 					ipp->ipp_fields |= IPPF_IFINDEX;
27650Sstevel@tonic-gate 				else
27660Sstevel@tonic-gate 					ipp->ipp_fields &= ~IPPF_IFINDEX;
27670Sstevel@tonic-gate 				if (!IN6_IS_ADDR_UNSPECIFIED(
27680Sstevel@tonic-gate 				    &ipp->ipp_addr))
27690Sstevel@tonic-gate 					ipp->ipp_fields |= IPPF_ADDR;
27700Sstevel@tonic-gate 				else
27710Sstevel@tonic-gate 					ipp->ipp_fields &= ~IPPF_ADDR;
27720Sstevel@tonic-gate 			}
27730Sstevel@tonic-gate 			if (sticky) {
27745240Snordmark 				error = icmp_build_hdrs(icmp);
27750Sstevel@tonic-gate 				if (error != 0)
27760Sstevel@tonic-gate 					return (error);
27778348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
27780Sstevel@tonic-gate 			}
27790Sstevel@tonic-gate 			break;
27800Sstevel@tonic-gate 		case IPV6_HOPLIMIT:
2781679Sseb 			/* This option can only be used as ancillary data. */
2782679Sseb 			if (sticky)
2783679Sseb 				return (EINVAL);
27840Sstevel@tonic-gate 			if (inlen != 0 && inlen != sizeof (int))
27850Sstevel@tonic-gate 				return (EINVAL);
27860Sstevel@tonic-gate 			if (checkonly)
27870Sstevel@tonic-gate 				break;
27880Sstevel@tonic-gate 
27890Sstevel@tonic-gate 			if (inlen == 0) {
27900Sstevel@tonic-gate 				ipp->ipp_fields &= ~IPPF_HOPLIMIT;
27910Sstevel@tonic-gate 				ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT;
27920Sstevel@tonic-gate 			} else {
27930Sstevel@tonic-gate 				if (*i1 > 255 || *i1 < -1)
27940Sstevel@tonic-gate 					return (EINVAL);
27950Sstevel@tonic-gate 				if (*i1 == -1)
27963448Sdh155122 					ipp->ipp_hoplimit =
27973448Sdh155122 					    is->is_ipv6_hoplimit;
27980Sstevel@tonic-gate 				else
27990Sstevel@tonic-gate 					ipp->ipp_hoplimit = *i1;
28000Sstevel@tonic-gate 				ipp->ipp_fields |= IPPF_HOPLIMIT;
28010Sstevel@tonic-gate 			}
28020Sstevel@tonic-gate 			break;
28030Sstevel@tonic-gate 		case IPV6_TCLASS:
28040Sstevel@tonic-gate 			/*
28050Sstevel@tonic-gate 			 * IPV6_RECVTCLASS accepts -1 as use kernel default
28060Sstevel@tonic-gate 			 * and [0, 255] as the actualy traffic class.
28070Sstevel@tonic-gate 			 */
28088348SEric.Yu@Sun.COM 			if (inlen != 0 && inlen != sizeof (int)) {
28090Sstevel@tonic-gate 				return (EINVAL);
28108348SEric.Yu@Sun.COM 			}
28110Sstevel@tonic-gate 			if (checkonly)
28120Sstevel@tonic-gate 				break;
28130Sstevel@tonic-gate 
28140Sstevel@tonic-gate 			if (inlen == 0) {
28150Sstevel@tonic-gate 				ipp->ipp_fields &= ~IPPF_TCLASS;
28160Sstevel@tonic-gate 				ipp->ipp_sticky_ignored |= IPPF_TCLASS;
28170Sstevel@tonic-gate 			} else {
28180Sstevel@tonic-gate 				if (*i1 >= 256 || *i1 < -1)
28190Sstevel@tonic-gate 					return (EINVAL);
28200Sstevel@tonic-gate 				if (*i1 == -1) {
28210Sstevel@tonic-gate 					ipp->ipp_tclass =
28220Sstevel@tonic-gate 					    IPV6_FLOW_TCLASS(
28230Sstevel@tonic-gate 					    IPV6_DEFAULT_VERS_AND_FLOW);
28240Sstevel@tonic-gate 				} else {
28250Sstevel@tonic-gate 					ipp->ipp_tclass = *i1;
28260Sstevel@tonic-gate 				}
28270Sstevel@tonic-gate 				ipp->ipp_fields |= IPPF_TCLASS;
28280Sstevel@tonic-gate 			}
28290Sstevel@tonic-gate 			if (sticky) {
28305240Snordmark 				error = icmp_build_hdrs(icmp);
28310Sstevel@tonic-gate 				if (error != 0)
28320Sstevel@tonic-gate 					return (error);
28330Sstevel@tonic-gate 			}
28340Sstevel@tonic-gate 			break;
28350Sstevel@tonic-gate 		case IPV6_NEXTHOP:
28360Sstevel@tonic-gate 			/*
28370Sstevel@tonic-gate 			 * IP will verify that the nexthop is reachable
28380Sstevel@tonic-gate 			 * and fail for sticky options.
28390Sstevel@tonic-gate 			 */
28408348SEric.Yu@Sun.COM 			if (inlen != 0 && inlen != sizeof (sin6_t)) {
28410Sstevel@tonic-gate 				return (EINVAL);
28428348SEric.Yu@Sun.COM 			}
28430Sstevel@tonic-gate 			if (checkonly)
28440Sstevel@tonic-gate 				break;
28450Sstevel@tonic-gate 
28460Sstevel@tonic-gate 			if (inlen == 0) {
28470Sstevel@tonic-gate 				ipp->ipp_fields &= ~IPPF_NEXTHOP;
28480Sstevel@tonic-gate 				ipp->ipp_sticky_ignored |= IPPF_NEXTHOP;
28490Sstevel@tonic-gate 			} else {
28500Sstevel@tonic-gate 				sin6_t *sin6 = (sin6_t *)invalp;
28510Sstevel@tonic-gate 
28528348SEric.Yu@Sun.COM 				if (sin6->sin6_family != AF_INET6) {
28530Sstevel@tonic-gate 					return (EAFNOSUPPORT);
28548348SEric.Yu@Sun.COM 				}
28558348SEric.Yu@Sun.COM 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
28560Sstevel@tonic-gate 					return (EADDRNOTAVAIL);
28578348SEric.Yu@Sun.COM 				}
28580Sstevel@tonic-gate 				ipp->ipp_nexthop = sin6->sin6_addr;
28590Sstevel@tonic-gate 				if (!IN6_IS_ADDR_UNSPECIFIED(
28600Sstevel@tonic-gate 				    &ipp->ipp_nexthop))
28610Sstevel@tonic-gate 					ipp->ipp_fields |= IPPF_NEXTHOP;
28620Sstevel@tonic-gate 				else
28630Sstevel@tonic-gate 					ipp->ipp_fields &= ~IPPF_NEXTHOP;
28640Sstevel@tonic-gate 			}
28650Sstevel@tonic-gate 			if (sticky) {
28665240Snordmark 				error = icmp_build_hdrs(icmp);
28670Sstevel@tonic-gate 				if (error != 0)
28680Sstevel@tonic-gate 					return (error);
28698348SEric.Yu@Sun.COM 				PASS_OPT_TO_IP(connp);
28700Sstevel@tonic-gate 			}
28710Sstevel@tonic-gate 			break;
28720Sstevel@tonic-gate 		case IPV6_HOPOPTS: {
28730Sstevel@tonic-gate 			ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
28740Sstevel@tonic-gate 			/*
28750Sstevel@tonic-gate 			 * Sanity checks - minimum size, size a multiple of
28760Sstevel@tonic-gate 			 * eight bytes, and matching size passed in.
28770Sstevel@tonic-gate 			 */
28780Sstevel@tonic-gate 			if (inlen != 0 &&
28798348SEric.Yu@Sun.COM 			    inlen != (8 * (hopts->ip6h_len + 1))) {
28800Sstevel@tonic-gate 				return (EINVAL);
28818348SEric.Yu@Sun.COM 			}
28820Sstevel@tonic-gate 
28830Sstevel@tonic-gate 			if (checkonly)
28840Sstevel@tonic-gate 				break;
28851676Sjpk 			error = optcom_pkt_set(invalp, inlen, sticky,
28861676Sjpk 			    (uchar_t **)&ipp->ipp_hopopts,
28871676Sjpk 			    &ipp->ipp_hopoptslen,
28881676Sjpk 			    sticky ? icmp->icmp_label_len_v6 : 0);
28891676Sjpk 			if (error != 0)
28901676Sjpk 				return (error);
28911676Sjpk 			if (ipp->ipp_hopoptslen == 0) {
28920Sstevel@tonic-gate 				ipp->ipp_fields &= ~IPPF_HOPOPTS;
28930Sstevel@tonic-gate 				ipp->ipp_sticky_ignored |= IPPF_HOPOPTS;
28940Sstevel@tonic-gate 			} else {
28950Sstevel@tonic-gate 				ipp->ipp_fields |= IPPF_HOPOPTS;
28960Sstevel@tonic-gate 			}
28970Sstevel@tonic-gate 			if (sticky) {
28985240Snordmark 				error = icmp_build_hdrs(icmp);
28990Sstevel@tonic-gate 				if (error != 0)
29000Sstevel@tonic-gate 					return (error);
29010Sstevel@tonic-gate 			}
29020Sstevel@tonic-gate 			break;
29030Sstevel@tonic-gate 		}
29040Sstevel@tonic-gate 		case IPV6_RTHDRDSTOPTS: {
29050Sstevel@tonic-gate 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
29060Sstevel@tonic-gate 
29070Sstevel@tonic-gate 			/*
29080Sstevel@tonic-gate 			 * Sanity checks - minimum size, size a multiple of
29090Sstevel@tonic-gate 			 * eight bytes, and matching size passed in.
29100Sstevel@tonic-gate 			 */
29110Sstevel@tonic-gate 			if (inlen != 0 &&
29120Sstevel@tonic-gate 			    inlen != (8 * (dopts->ip6d_len + 1)))
29130Sstevel@tonic-gate 				return (EINVAL);
29140Sstevel@tonic-gate 
29150Sstevel@tonic-gate 			if (checkonly)
29160Sstevel@tonic-gate 				break;
29170Sstevel@tonic-gate 
29180Sstevel@tonic-gate 			if (inlen == 0) {
29190Sstevel@tonic-gate 				if (sticky &&
29200Sstevel@tonic-gate 				    (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) {
29210Sstevel@tonic-gate 					kmem_free(ipp->ipp_rtdstopts,
29220Sstevel@tonic-gate 					    ipp->ipp_rtdstoptslen);
29230Sstevel@tonic-gate 					ipp->ipp_rtdstopts = NULL;
29240Sstevel@tonic-gate 					ipp->ipp_rtdstoptslen = 0;
29250Sstevel@tonic-gate 				}
29260Sstevel@tonic-gate 				ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
29270Sstevel@tonic-gate 				ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS;
29280Sstevel@tonic-gate 			} else {
29291676Sjpk 				error = optcom_pkt_set(invalp, inlen, sticky,
29300Sstevel@tonic-gate 				    (uchar_t **)&ipp->ipp_rtdstopts,
29311676Sjpk 				    &ipp->ipp_rtdstoptslen, 0);
29320Sstevel@tonic-gate 				if (error != 0)
29330Sstevel@tonic-gate 					return (error);
29340Sstevel@tonic-gate 				ipp->ipp_fields |= IPPF_RTDSTOPTS;
29350Sstevel@tonic-gate 			}
29360Sstevel@tonic-gate 			if (sticky) {
29375240Snordmark 				error = icmp_build_hdrs(icmp);
29380Sstevel@tonic-gate 				if (error != 0)
29390Sstevel@tonic-gate 					return (error);
29400Sstevel@tonic-gate 			}
29410Sstevel@tonic-gate 			break;
29420Sstevel@tonic-gate 		}
29430Sstevel@tonic-gate 		case IPV6_DSTOPTS: {
29440Sstevel@tonic-gate 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
29450Sstevel@tonic-gate 
29460Sstevel@tonic-gate 			/*
29470Sstevel@tonic-gate 			 * Sanity checks - minimum size, size a multiple of
29480Sstevel@tonic-gate 			 * eight bytes, and matching size passed in.
29490Sstevel@tonic-gate 			 */
29500Sstevel@tonic-gate 			if (inlen != 0 &&
29510Sstevel@tonic-gate 			    inlen != (8 * (dopts->ip6d_len + 1)))
29520Sstevel@tonic-gate 				return (EINVAL);
29530Sstevel@tonic-gate 
29540Sstevel@tonic-gate 			if (checkonly)
29550Sstevel@tonic-gate 				break;
29560Sstevel@tonic-gate 
29570Sstevel@tonic-gate 			if (inlen == 0) {
29580Sstevel@tonic-gate 				if (sticky &&
29590Sstevel@tonic-gate 				    (ipp->ipp_fields & IPPF_DSTOPTS) != 0) {
29600Sstevel@tonic-gate 					kmem_free(ipp->ipp_dstopts,
29610Sstevel@tonic-gate 					    ipp->ipp_dstoptslen);
29620Sstevel@tonic-gate 					ipp->ipp_dstopts = NULL;
29630Sstevel@tonic-gate 					ipp->ipp_dstoptslen = 0;
29640Sstevel@tonic-gate 				}
29650Sstevel@tonic-gate 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
29660Sstevel@tonic-gate 				ipp->ipp_sticky_ignored |= IPPF_DSTOPTS;
29670Sstevel@tonic-gate 			} else {
29681676Sjpk 				error = optcom_pkt_set(invalp, inlen, sticky,
29690Sstevel@tonic-gate 				    (uchar_t **)&ipp->ipp_dstopts,
29701676Sjpk 				    &ipp->ipp_dstoptslen, 0);
29710Sstevel@tonic-gate 				if (error != 0)
29720Sstevel@tonic-gate 					return (error);
29730Sstevel@tonic-gate 				ipp->ipp_fields |= IPPF_DSTOPTS;
29740Sstevel@tonic-gate 			}
29750Sstevel@tonic-gate 			if (sticky) {
29765240Snordmark 				error = icmp_build_hdrs(icmp);
29770Sstevel@tonic-gate 				if (error != 0)
29780Sstevel@tonic-gate 					return (error);
29790Sstevel@tonic-gate 			}
29800Sstevel@tonic-gate 			break;
29810Sstevel@tonic-gate 		}
29820Sstevel@tonic-gate 		case IPV6_RTHDR: {
29830Sstevel@tonic-gate 			ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
29840Sstevel@tonic-gate 
29850Sstevel@tonic-gate 			/*
29860Sstevel@tonic-gate 			 * Sanity checks - minimum size, size a multiple of
29870Sstevel@tonic-gate 			 * eight bytes, and matching size passed in.
29880Sstevel@tonic-gate 			 */
29890Sstevel@tonic-gate 			if (inlen != 0 &&
29900Sstevel@tonic-gate 			    inlen != (8 * (rt->ip6r_len + 1)))
29910Sstevel@tonic-gate 				return (EINVAL);
29920Sstevel@tonic-gate 
29930Sstevel@tonic-gate 			if (checkonly)
29940Sstevel@tonic-gate 				break;
29950Sstevel@tonic-gate 
29960Sstevel@tonic-gate 			if (inlen == 0) {
29970Sstevel@tonic-gate 				if (sticky &&
29980Sstevel@tonic-gate 				    (ipp->ipp_fields & IPPF_RTHDR) != 0) {
29990Sstevel@tonic-gate 					kmem_free(ipp->ipp_rthdr,
30000Sstevel@tonic-gate 					    ipp->ipp_rthdrlen);
30010Sstevel@tonic-gate 					ipp->ipp_rthdr = NULL;
30020Sstevel@tonic-gate 					ipp->ipp_rthdrlen = 0;
30030Sstevel@tonic-gate 				}
30040Sstevel@tonic-gate 				ipp->ipp_fields &= ~IPPF_RTHDR;
30050Sstevel@tonic-gate 				ipp->ipp_sticky_ignored |= IPPF_RTHDR;
30060Sstevel@tonic-gate 			} else {
30071676Sjpk 				error = optcom_pkt_set(invalp, inlen, sticky,
30080Sstevel@tonic-gate 				    (uchar_t **)&ipp->ipp_rthdr,
30091676Sjpk 				    &ipp->ipp_rthdrlen, 0);
30100Sstevel@tonic-gate 				if (error != 0)
30110Sstevel@tonic-gate 					return (error);
30120Sstevel@tonic-gate 				ipp->ipp_fields |= IPPF_RTHDR;
30130Sstevel@tonic-gate 			}
30140Sstevel@tonic-gate 			if (sticky) {
30155240Snordmark 				error = icmp_build_hdrs(icmp);
30160Sstevel@tonic-gate 				if (error != 0)
30170Sstevel@tonic-gate 					return (error);
30180Sstevel@tonic-gate 			}
30190Sstevel@tonic-gate 			break;
30200Sstevel@tonic-gate 		}
30210Sstevel@tonic-gate 
30220Sstevel@tonic-gate 		case IPV6_DONTFRAG:
30230Sstevel@tonic-gate 			if (checkonly)
30240Sstevel@tonic-gate 				break;
30250Sstevel@tonic-gate 
30260Sstevel@tonic-gate 			if (onoff) {
30270Sstevel@tonic-gate 				ipp->ipp_fields |= IPPF_DONTFRAG;
30280Sstevel@tonic-gate 			} else {
30290Sstevel@tonic-gate 				ipp->ipp_fields &= ~IPPF_DONTFRAG;
30300Sstevel@tonic-gate 			}
30310Sstevel@tonic-gate 			break;
30320Sstevel@tonic-gate 
30330Sstevel@tonic-gate 		case IPV6_USE_MIN_MTU:
30340Sstevel@tonic-gate 			if (inlen != sizeof (int))
30350Sstevel@tonic-gate 				return (EINVAL);
30360Sstevel@tonic-gate 
30370Sstevel@tonic-gate 			if (*i1 < -1 || *i1 > 1)
30380Sstevel@tonic-gate 				return (EINVAL);
30390Sstevel@tonic-gate 
30400Sstevel@tonic-gate 			if (checkonly)
30410Sstevel@tonic-gate 				break;
30420Sstevel@tonic-gate 
30430Sstevel@tonic-gate 			ipp->ipp_fields |= IPPF_USE_MIN_MTU;
30440Sstevel@tonic-gate 			ipp->ipp_use_min_mtu = *i1;
30450Sstevel@tonic-gate 			break;
30460Sstevel@tonic-gate 
30470Sstevel@tonic-gate 		/*
30480Sstevel@tonic-gate 		 * This option can't be set.  Its only returned via
30490Sstevel@tonic-gate 		 * getsockopt() or ancillary data.
30500Sstevel@tonic-gate 		 */
30510Sstevel@tonic-gate 		case IPV6_PATHMTU:
30520Sstevel@tonic-gate 			return (EINVAL);
30530Sstevel@tonic-gate 
30540Sstevel@tonic-gate 		case IPV6_SEC_OPT:
30550Sstevel@tonic-gate 		case IPV6_SRC_PREFERENCES:
30560Sstevel@tonic-gate 		case IPV6_V6ONLY:
30570Sstevel@tonic-gate 			/* Handled at IP level */
30580Sstevel@tonic-gate 			return (-EINVAL);
30590Sstevel@tonic-gate 		default:
30600Sstevel@tonic-gate 			*outlenp = 0;
30610Sstevel@tonic-gate 			return (EINVAL);
30620Sstevel@tonic-gate 		}
30630Sstevel@tonic-gate 		break;
30640Sstevel@tonic-gate 	}		/* end IPPROTO_IPV6 */
30650Sstevel@tonic-gate 
30660Sstevel@tonic-gate 	case IPPROTO_ICMPV6:
30670Sstevel@tonic-gate 		/*
30680Sstevel@tonic-gate 		 * Only allow IPv6 option processing on IPv6 sockets.
30690Sstevel@tonic-gate 		 */
30700Sstevel@tonic-gate 		if (icmp->icmp_family != AF_INET6) {
30710Sstevel@tonic-gate 			*outlenp = 0;
30720Sstevel@tonic-gate 			return (ENOPROTOOPT);
30730Sstevel@tonic-gate 		}
30740Sstevel@tonic-gate 		if (icmp->icmp_proto != IPPROTO_ICMPV6) {
30750Sstevel@tonic-gate 			*outlenp = 0;
30760Sstevel@tonic-gate 			return (ENOPROTOOPT);
30770Sstevel@tonic-gate 		}
30780Sstevel@tonic-gate 		switch (name) {
30790Sstevel@tonic-gate 		case ICMP6_FILTER:
30800Sstevel@tonic-gate 			if (!checkonly) {
30810Sstevel@tonic-gate 				if ((inlen != 0) &&
30820Sstevel@tonic-gate 				    (inlen != sizeof (icmp6_filter_t)))
30830Sstevel@tonic-gate 					return (EINVAL);
30840Sstevel@tonic-gate 
30850Sstevel@tonic-gate 				if (inlen == 0) {
30860Sstevel@tonic-gate 					if (icmp->icmp_filter != NULL) {
30870Sstevel@tonic-gate 						kmem_free(icmp->icmp_filter,
30880Sstevel@tonic-gate 						    sizeof (icmp6_filter_t));
30890Sstevel@tonic-gate 						icmp->icmp_filter = NULL;
30900Sstevel@tonic-gate 					}
30910Sstevel@tonic-gate 				} else {
30920Sstevel@tonic-gate 					if (icmp->icmp_filter == NULL) {
30930Sstevel@tonic-gate 						icmp->icmp_filter = kmem_alloc(
30940Sstevel@tonic-gate 						    sizeof (icmp6_filter_t),
30950Sstevel@tonic-gate 						    KM_NOSLEEP);
30960Sstevel@tonic-gate 						if (icmp->icmp_filter == NULL) {
30970Sstevel@tonic-gate 							*outlenp = 0;
30980Sstevel@tonic-gate 							return (ENOBUFS);
30990Sstevel@tonic-gate 						}
31000Sstevel@tonic-gate 					}
31010Sstevel@tonic-gate 					(void) bcopy(invalp, icmp->icmp_filter,
31020Sstevel@tonic-gate 					    inlen);
31030Sstevel@tonic-gate 				}
31040Sstevel@tonic-gate 			}
31050Sstevel@tonic-gate 			break;
31060Sstevel@tonic-gate 
31070Sstevel@tonic-gate 		default:
31080Sstevel@tonic-gate 			*outlenp = 0;
31090Sstevel@tonic-gate 			return (EINVAL);
31100Sstevel@tonic-gate 		}
31110Sstevel@tonic-gate 		break;
31120Sstevel@tonic-gate 	default:
31130Sstevel@tonic-gate 		*outlenp = 0;
31140Sstevel@tonic-gate 		return (EINVAL);
31150Sstevel@tonic-gate 	}
31160Sstevel@tonic-gate 	/*
31170Sstevel@tonic-gate 	 * Common case of OK return with outval same as inval.
31180Sstevel@tonic-gate 	 */
31190Sstevel@tonic-gate 	if (invalp != outvalp) {
31200Sstevel@tonic-gate 		/* don't trust bcopy for identical src/dst */
31210Sstevel@tonic-gate 		(void) bcopy(invalp, outvalp, inlen);
31220Sstevel@tonic-gate 	}
31230Sstevel@tonic-gate 	*outlenp = inlen;
31240Sstevel@tonic-gate 	return (0);
31250Sstevel@tonic-gate }
31268348SEric.Yu@Sun.COM 
31275240Snordmark /* This routine sets socket options. */
31285240Snordmark /* ARGSUSED */
31295240Snordmark int
31308348SEric.Yu@Sun.COM icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
31318348SEric.Yu@Sun.COM     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
31328348SEric.Yu@Sun.COM     void *thisdg_attrs, cred_t *cr)
31338348SEric.Yu@Sun.COM {
31348348SEric.Yu@Sun.COM 	boolean_t checkonly;
31358348SEric.Yu@Sun.COM 	int	error;
31368348SEric.Yu@Sun.COM 
31378348SEric.Yu@Sun.COM 	error = 0;
31388348SEric.Yu@Sun.COM 	switch (optset_context) {
31398348SEric.Yu@Sun.COM 	case SETFN_OPTCOM_CHECKONLY:
31408348SEric.Yu@Sun.COM 		checkonly = B_TRUE;
31418348SEric.Yu@Sun.COM 		/*
31428348SEric.Yu@Sun.COM 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
31438348SEric.Yu@Sun.COM 		 * inlen != 0 implies value supplied and
31448348SEric.Yu@Sun.COM 		 * 	we have to "pretend" to set it.
31458348SEric.Yu@Sun.COM 		 * inlen == 0 implies that there is no
31468348SEric.Yu@Sun.COM 		 * 	value part in T_CHECK request and just validation
31478348SEric.Yu@Sun.COM 		 * done elsewhere should be enough, we just return here.
31488348SEric.Yu@Sun.COM 		 */
31498348SEric.Yu@Sun.COM 		if (inlen == 0) {
31508348SEric.Yu@Sun.COM 			*outlenp = 0;
31518348SEric.Yu@Sun.COM 			error = 0;
31528348SEric.Yu@Sun.COM 			goto done;
31538348SEric.Yu@Sun.COM 		}
31548348SEric.Yu@Sun.COM 		break;
31558348SEric.Yu@Sun.COM 	case SETFN_OPTCOM_NEGOTIATE:
31568348SEric.Yu@Sun.COM 		checkonly = B_FALSE;
31578348SEric.Yu@Sun.COM 		break;
31588348SEric.Yu@Sun.COM 	case SETFN_UD_NEGOTIATE:
31598348SEric.Yu@Sun.COM 	case SETFN_CONN_NEGOTIATE:
31608348SEric.Yu@Sun.COM 		checkonly = B_FALSE;
31618348SEric.Yu@Sun.COM 		/*
31628348SEric.Yu@Sun.COM 		 * Negotiating local and "association-related" options
31638348SEric.Yu@Sun.COM 		 * through T_UNITDATA_REQ.
31648348SEric.Yu@Sun.COM 		 *
31658348SEric.Yu@Sun.COM 		 * Following routine can filter out ones we do not
31668348SEric.Yu@Sun.COM 		 * want to be "set" this way.
31678348SEric.Yu@Sun.COM 		 */
31688348SEric.Yu@Sun.COM 		if (!icmp_opt_allow_udr_set(level, name)) {
31698348SEric.Yu@Sun.COM 			*outlenp = 0;
31708348SEric.Yu@Sun.COM 			error = EINVAL;
31718348SEric.Yu@Sun.COM 			goto done;
31728348SEric.Yu@Sun.COM 		}
31738348SEric.Yu@Sun.COM 		break;
31748348SEric.Yu@Sun.COM 	default:
31758348SEric.Yu@Sun.COM 		/*
31768348SEric.Yu@Sun.COM 		 * We should never get here
31778348SEric.Yu@Sun.COM 		 */
31788348SEric.Yu@Sun.COM 		*outlenp = 0;
31798348SEric.Yu@Sun.COM 		error = EINVAL;
31808348SEric.Yu@Sun.COM 		goto done;
31818348SEric.Yu@Sun.COM 	}
31828348SEric.Yu@Sun.COM 
31838348SEric.Yu@Sun.COM 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
31848348SEric.Yu@Sun.COM 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
31858348SEric.Yu@Sun.COM 	error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp,
31868348SEric.Yu@Sun.COM 	    outvalp, cr, thisdg_attrs, checkonly);
31878348SEric.Yu@Sun.COM 
31888348SEric.Yu@Sun.COM done:
31898348SEric.Yu@Sun.COM 	return (error);
31908348SEric.Yu@Sun.COM }
31918348SEric.Yu@Sun.COM 
31928348SEric.Yu@Sun.COM /* This routine sets socket options. */
31938348SEric.Yu@Sun.COM /* ARGSUSED */
31948348SEric.Yu@Sun.COM int
31958348SEric.Yu@Sun.COM icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
31965240Snordmark     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
31975240Snordmark     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
31985240Snordmark {
31998348SEric.Yu@Sun.COM 	conn_t	*connp =  Q_TO_CONN(q);
32005240Snordmark 	icmp_t	*icmp;
32018348SEric.Yu@Sun.COM 	int error;
32028348SEric.Yu@Sun.COM 
32038348SEric.Yu@Sun.COM 	icmp = connp->conn_icmp;
32045240Snordmark 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
32058348SEric.Yu@Sun.COM 	error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp,
32068348SEric.Yu@Sun.COM 	    outlenp, outvalp, thisdg_attrs, cr);
32075240Snordmark 	rw_exit(&icmp->icmp_rwlock);
32088348SEric.Yu@Sun.COM 	return (error);
32095240Snordmark }
32100Sstevel@tonic-gate 
32110Sstevel@tonic-gate /*
32120Sstevel@tonic-gate  * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl,
32130Sstevel@tonic-gate  * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum.
32140Sstevel@tonic-gate  * The headers include ip6i_t (if needed), ip6_t, and any sticky extension
32150Sstevel@tonic-gate  * headers.
32160Sstevel@tonic-gate  * Returns failure if can't allocate memory.
32170Sstevel@tonic-gate  */
32180Sstevel@tonic-gate static int
32195240Snordmark icmp_build_hdrs(icmp_t *icmp)
32200Sstevel@tonic-gate {
32213448Sdh155122 	icmp_stack_t *is = icmp->icmp_is;
32220Sstevel@tonic-gate 	uchar_t	*hdrs;
32230Sstevel@tonic-gate 	uint_t	hdrs_len;
32240Sstevel@tonic-gate 	ip6_t	*ip6h;
32250Sstevel@tonic-gate 	ip6i_t	*ip6i;
32260Sstevel@tonic-gate 	ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp;
32270Sstevel@tonic-gate 
32285240Snordmark 	ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock));
32290Sstevel@tonic-gate 	hdrs_len = ip_total_hdrs_len_v6(ipp);
32300Sstevel@tonic-gate 	ASSERT(hdrs_len != 0);
32310Sstevel@tonic-gate 	if (hdrs_len != icmp->icmp_sticky_hdrs_len) {
32320Sstevel@tonic-gate 		/* Need to reallocate */
32330Sstevel@tonic-gate 		if (hdrs_len != 0) {
32340Sstevel@tonic-gate 			hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
32350Sstevel@tonic-gate 			if (hdrs == NULL)
32360Sstevel@tonic-gate 				return (ENOMEM);
32370Sstevel@tonic-gate 		} else {
32380Sstevel@tonic-gate 			hdrs = NULL;
32390Sstevel@tonic-gate 		}
32400Sstevel@tonic-gate 		if (icmp->icmp_sticky_hdrs_len != 0) {
32410Sstevel@tonic-gate 			kmem_free(icmp->icmp_sticky_hdrs,
32420Sstevel@tonic-gate 			    icmp->icmp_sticky_hdrs_len);
32430Sstevel@tonic-gate 		}
32440Sstevel@tonic-gate 		icmp->icmp_sticky_hdrs = hdrs;
32450Sstevel@tonic-gate 		icmp->icmp_sticky_hdrs_len = hdrs_len;
32460Sstevel@tonic-gate 	}
32470Sstevel@tonic-gate 	ip_build_hdrs_v6(icmp->icmp_sticky_hdrs,
32480Sstevel@tonic-gate 	    icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto);
32490Sstevel@tonic-gate 
32500Sstevel@tonic-gate 	/* Set header fields not in ipp */
32510Sstevel@tonic-gate 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
32520Sstevel@tonic-gate 		ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs;
32530Sstevel@tonic-gate 		ip6h = (ip6_t *)&ip6i[1];
32540Sstevel@tonic-gate 
32550Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_RAW_CKSUM) {
32560Sstevel@tonic-gate 			ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM;
32570Sstevel@tonic-gate 			ip6i->ip6i_checksum_off = icmp->icmp_checksum_off;
32580Sstevel@tonic-gate 		}
32590Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_NO_CKSUM) {
32600Sstevel@tonic-gate 			ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM;
32610Sstevel@tonic-gate 		}
32620Sstevel@tonic-gate 	} else {
32630Sstevel@tonic-gate 		ip6h = (ip6_t *)icmp->icmp_sticky_hdrs;
32640Sstevel@tonic-gate 	}
32650Sstevel@tonic-gate 
32660Sstevel@tonic-gate 	if (!(ipp->ipp_fields & IPPF_ADDR))
32670Sstevel@tonic-gate 		ip6h->ip6_src = icmp->icmp_v6src;
32680Sstevel@tonic-gate 
32690Sstevel@tonic-gate 	/* Try to get everything in a single mblk */
32700Sstevel@tonic-gate 	if (hdrs_len > icmp->icmp_max_hdr_len) {
32710Sstevel@tonic-gate 		icmp->icmp_max_hdr_len = hdrs_len;
32725240Snordmark 		rw_exit(&icmp->icmp_rwlock);
32738348SEric.Yu@Sun.COM 		(void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq,
32748348SEric.Yu@Sun.COM 		    icmp->icmp_connp,
32755240Snordmark 		    icmp->icmp_max_hdr_len + is->is_wroff_extra);
32765240Snordmark 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
32770Sstevel@tonic-gate 	}
32780Sstevel@tonic-gate 	return (0);
32790Sstevel@tonic-gate }
32800Sstevel@tonic-gate 
32810Sstevel@tonic-gate /*
32820Sstevel@tonic-gate  * This routine retrieves the value of an ND variable in a icmpparam_t
32830Sstevel@tonic-gate  * structure.  It is called through nd_getset when a user reads the
32840Sstevel@tonic-gate  * variable.
32850Sstevel@tonic-gate  */
32860Sstevel@tonic-gate /* ARGSUSED */
32870Sstevel@tonic-gate static int
32880Sstevel@tonic-gate icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
32890Sstevel@tonic-gate {
32900Sstevel@tonic-gate 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
32910Sstevel@tonic-gate 
32920Sstevel@tonic-gate 	(void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value);
32930Sstevel@tonic-gate 	return (0);
32940Sstevel@tonic-gate }
32950Sstevel@tonic-gate 
32960Sstevel@tonic-gate /*
32970Sstevel@tonic-gate  * Walk through the param array specified registering each element with the
32980Sstevel@tonic-gate  * named dispatch (ND) handler.
32990Sstevel@tonic-gate  */
33000Sstevel@tonic-gate static boolean_t
33013448Sdh155122 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt)
33020Sstevel@tonic-gate {
33030Sstevel@tonic-gate 	for (; cnt-- > 0; icmppa++) {
33040Sstevel@tonic-gate 		if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) {
33053448Sdh155122 			if (!nd_load(ndp, icmppa->icmp_param_name,
33060Sstevel@tonic-gate 			    icmp_param_get, icmp_param_set,
33070Sstevel@tonic-gate 			    (caddr_t)icmppa)) {
33083448Sdh155122 				nd_free(ndp);
33090Sstevel@tonic-gate 				return (B_FALSE);
33100Sstevel@tonic-gate 			}
33110Sstevel@tonic-gate 		}
33120Sstevel@tonic-gate 	}
33133448Sdh155122 	if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL,
33140Sstevel@tonic-gate 	    NULL)) {
33153448Sdh155122 		nd_free(ndp);
33160Sstevel@tonic-gate 		return (B_FALSE);
33170Sstevel@tonic-gate 	}
33180Sstevel@tonic-gate 	return (B_TRUE);
33190Sstevel@tonic-gate }
33200Sstevel@tonic-gate 
33210Sstevel@tonic-gate /* This routine sets an ND variable in a icmpparam_t structure. */
33220Sstevel@tonic-gate /* ARGSUSED */
33230Sstevel@tonic-gate static int
33240Sstevel@tonic-gate icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
33250Sstevel@tonic-gate {
33260Sstevel@tonic-gate 	long		new_value;
33270Sstevel@tonic-gate 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
33280Sstevel@tonic-gate 
33290Sstevel@tonic-gate 	/*
33300Sstevel@tonic-gate 	 * Fail the request if the new value does not lie within the
33310Sstevel@tonic-gate 	 * required bounds.
33320Sstevel@tonic-gate 	 */
33330Sstevel@tonic-gate 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
33340Sstevel@tonic-gate 	    new_value < icmppa->icmp_param_min ||
33350Sstevel@tonic-gate 	    new_value > icmppa->icmp_param_max) {
33360Sstevel@tonic-gate 		return (EINVAL);
33370Sstevel@tonic-gate 	}
33380Sstevel@tonic-gate 	/* Set the new value */
33390Sstevel@tonic-gate 	icmppa->icmp_param_value = new_value;
33400Sstevel@tonic-gate 	return (0);
33410Sstevel@tonic-gate }
33428348SEric.Yu@Sun.COM static void
33438348SEric.Yu@Sun.COM icmp_queue_fallback(icmp_t *icmp, mblk_t *mp)
33448348SEric.Yu@Sun.COM {
33458348SEric.Yu@Sun.COM 	ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock));
33468348SEric.Yu@Sun.COM 	if (IPCL_IS_NONSTR(icmp->icmp_connp)) {
33478348SEric.Yu@Sun.COM 		/*
33488348SEric.Yu@Sun.COM 		 * fallback has started but messages have not been moved yet
33498348SEric.Yu@Sun.COM 		 */
33508348SEric.Yu@Sun.COM 		if (icmp->icmp_fallback_queue_head == NULL) {
33518348SEric.Yu@Sun.COM 			ASSERT(icmp->icmp_fallback_queue_tail == NULL);
33528348SEric.Yu@Sun.COM 			icmp->icmp_fallback_queue_head = mp;
33538348SEric.Yu@Sun.COM 			icmp->icmp_fallback_queue_tail = mp;
33548348SEric.Yu@Sun.COM 		} else {
33558348SEric.Yu@Sun.COM 			ASSERT(icmp->icmp_fallback_queue_tail != NULL);
33568348SEric.Yu@Sun.COM 			icmp->icmp_fallback_queue_tail->b_next = mp;
33578348SEric.Yu@Sun.COM 			icmp->icmp_fallback_queue_tail = mp;
33588348SEric.Yu@Sun.COM 		}
33598348SEric.Yu@Sun.COM 		mutex_exit(&icmp->icmp_recv_lock);
33608348SEric.Yu@Sun.COM 	} else {
33618348SEric.Yu@Sun.COM 		/*
33628348SEric.Yu@Sun.COM 		 * no more fallbacks possible, ok to drop lock.
33638348SEric.Yu@Sun.COM 		 */
33648348SEric.Yu@Sun.COM 		mutex_exit(&icmp->icmp_recv_lock);
33658348SEric.Yu@Sun.COM 		putnext(icmp->icmp_connp->conn_rq, mp);
33668348SEric.Yu@Sun.COM 	}
33678348SEric.Yu@Sun.COM }
33688348SEric.Yu@Sun.COM 
33695240Snordmark /*ARGSUSED2*/
33700Sstevel@tonic-gate static void
33715240Snordmark icmp_input(void *arg1, mblk_t *mp, void *arg2)
33720Sstevel@tonic-gate {
33735240Snordmark 	conn_t *connp = (conn_t *)arg1;
33740Sstevel@tonic-gate 	struct T_unitdata_ind	*tudi;
33750Sstevel@tonic-gate 	uchar_t			*rptr;
33765240Snordmark 	icmp_t			*icmp;
33775240Snordmark 	icmp_stack_t		*is;
33780Sstevel@tonic-gate 	sin_t			*sin;
33790Sstevel@tonic-gate 	sin6_t			*sin6;
33800Sstevel@tonic-gate 	ip6_t			*ip6h;
33810Sstevel@tonic-gate 	ip6i_t			*ip6i;
33820Sstevel@tonic-gate 	mblk_t			*mp1;
33830Sstevel@tonic-gate 	int			hdr_len;
33840Sstevel@tonic-gate 	ipha_t			*ipha;
33850Sstevel@tonic-gate 	int			udi_size;	/* Size of T_unitdata_ind */
33860Sstevel@tonic-gate 	uint_t			ipvers;
33870Sstevel@tonic-gate 	ip6_pkt_t		ipp;
33880Sstevel@tonic-gate 	uint8_t			nexthdr;
33893318Srshoaib 	ip_pktinfo_t		*pinfo = NULL;
33900Sstevel@tonic-gate 	mblk_t			*options_mp = NULL;
33910Sstevel@tonic-gate 	uint_t			icmp_opt = 0;
33920Sstevel@tonic-gate 	boolean_t		icmp_ipv6_recvhoplimit = B_FALSE;
33931676Sjpk 	uint_t			hopstrip;
33948348SEric.Yu@Sun.COM 	int			error;
33950Sstevel@tonic-gate 
33965240Snordmark 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
33975240Snordmark 
33985240Snordmark 	icmp = connp->conn_icmp;
33995240Snordmark 	is = icmp->icmp_is;
34005240Snordmark 	rptr = mp->b_rptr;
34015240Snordmark 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL);
34025240Snordmark 	ASSERT(OK_32PTR(rptr));
34035240Snordmark 
34045240Snordmark 	/*
34055240Snordmark 	 * IP should have prepended the options data in an M_CTL
34065240Snordmark 	 * Check M_CTL "type" to make sure are not here bcos of
34075240Snordmark 	 * a valid ICMP message
34085240Snordmark 	 */
34095240Snordmark 	if (DB_TYPE(mp) == M_CTL) {
34100Sstevel@tonic-gate 		/*
34115240Snordmark 		 * FIXME: does IP still do this?
34120Sstevel@tonic-gate 		 * IP sends up the IPSEC_IN message for handling IPSEC
34130Sstevel@tonic-gate 		 * policy at the TCP level. We don't need it here.
34140Sstevel@tonic-gate 		 */
34150Sstevel@tonic-gate 		if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) {
34160Sstevel@tonic-gate 			mp1 = mp->b_cont;
34170Sstevel@tonic-gate 			freeb(mp);
34180Sstevel@tonic-gate 			mp = mp1;
34195240Snordmark 			rptr = mp->b_rptr;
34205240Snordmark 		} else if (MBLKL(mp) == sizeof (ip_pktinfo_t) &&
34215240Snordmark 		    ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type ==
34225240Snordmark 		    IN_PKTINFO) {
34235240Snordmark 			/*
34245240Snordmark 			 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information
34255240Snordmark 			 * has been prepended to the packet by IP. We need to
34265240Snordmark 			 * extract the mblk and adjust the rptr
34275240Snordmark 			 */
34283318Srshoaib 			pinfo = (ip_pktinfo_t *)mp->b_rptr;
34295240Snordmark 			options_mp = mp;
34305240Snordmark 			mp = mp->b_cont;
34315240Snordmark 			rptr = mp->b_rptr;
34325240Snordmark 		} else {
34335240Snordmark 			/*
34345240Snordmark 			 * ICMP messages.
34355240Snordmark 			 */
34368348SEric.Yu@Sun.COM 			icmp_icmp_error(connp, mp);
34370Sstevel@tonic-gate 			return;
34380Sstevel@tonic-gate 		}
34390Sstevel@tonic-gate 	}
34400Sstevel@tonic-gate 
34410Sstevel@tonic-gate 	/*
34420Sstevel@tonic-gate 	 * Discard message if it is misaligned or smaller than the IP header.
34430Sstevel@tonic-gate 	 */
34440Sstevel@tonic-gate 	if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) {
34450Sstevel@tonic-gate 		freemsg(mp);
34460Sstevel@tonic-gate 		if (options_mp != NULL)
34470Sstevel@tonic-gate 			freeb(options_mp);
34485240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
34490Sstevel@tonic-gate 		return;
34500Sstevel@tonic-gate 	}
34510Sstevel@tonic-gate 	ipvers = IPH_HDR_VERSION((ipha_t *)rptr);
34520Sstevel@tonic-gate 
34530Sstevel@tonic-gate 	/* Handle M_DATA messages containing IP packets messages */
34540Sstevel@tonic-gate 	if (ipvers == IPV4_VERSION) {
34550Sstevel@tonic-gate 		/*
34560Sstevel@tonic-gate 		 * Special case where IP attaches
34570Sstevel@tonic-gate 		 * the IRE needs to be handled so that we don't send up
34580Sstevel@tonic-gate 		 * IRE to the user land.
34590Sstevel@tonic-gate 		 */
34600Sstevel@tonic-gate 		ipha = (ipha_t *)rptr;
34610Sstevel@tonic-gate 		hdr_len = IPH_HDR_LENGTH(ipha);
34620Sstevel@tonic-gate 
34630Sstevel@tonic-gate 		if (ipha->ipha_protocol == IPPROTO_TCP) {
34640Sstevel@tonic-gate 			tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len];
34650Sstevel@tonic-gate 
34660Sstevel@tonic-gate 			if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) ==
34670Sstevel@tonic-gate 			    TH_SYN) && mp->b_cont != NULL) {
34680Sstevel@tonic-gate 				mp1 = mp->b_cont;
34690Sstevel@tonic-gate 				if (mp1->b_datap->db_type == IRE_DB_TYPE) {
34700Sstevel@tonic-gate 					freeb(mp1);
34710Sstevel@tonic-gate 					mp->b_cont = NULL;
34720Sstevel@tonic-gate 				}
34730Sstevel@tonic-gate 			}
34740Sstevel@tonic-gate 		}
34753448Sdh155122 		if (is->is_bsd_compat) {
34760Sstevel@tonic-gate 			ushort_t len;
34770Sstevel@tonic-gate 			len = ntohs(ipha->ipha_length);
34780Sstevel@tonic-gate 
34790Sstevel@tonic-gate 			if (mp->b_datap->db_ref > 1) {
34800Sstevel@tonic-gate 				/*
34810Sstevel@tonic-gate 				 * Allocate a new IP header so that we can
34820Sstevel@tonic-gate 				 * modify ipha_length.
34830Sstevel@tonic-gate 				 */
34840Sstevel@tonic-gate 				mblk_t	*mp1;
34850Sstevel@tonic-gate 
34860Sstevel@tonic-gate 				mp1 = allocb(hdr_len, BPRI_MED);
34870Sstevel@tonic-gate 				if (!mp1) {
34880Sstevel@tonic-gate 					freemsg(mp);
34890Sstevel@tonic-gate 					if (options_mp != NULL)
34900Sstevel@tonic-gate 						freeb(options_mp);
34915240Snordmark 					BUMP_MIB(&is->is_rawip_mib,
34923448Sdh155122 					    rawipInErrors);
34930Sstevel@tonic-gate 					return;
34940Sstevel@tonic-gate 				}
34950Sstevel@tonic-gate 				bcopy(rptr, mp1->b_rptr, hdr_len);
34960Sstevel@tonic-gate 				mp->b_rptr = rptr + hdr_len;
34970Sstevel@tonic-gate 				rptr = mp1->b_rptr;
34980Sstevel@tonic-gate 				ipha = (ipha_t *)rptr;
34990Sstevel@tonic-gate 				mp1->b_cont = mp;
35000Sstevel@tonic-gate 				mp1->b_wptr = rptr + hdr_len;
35010Sstevel@tonic-gate 				mp = mp1;
35020Sstevel@tonic-gate 			}
35030Sstevel@tonic-gate 			len -= hdr_len;
35040Sstevel@tonic-gate 			ipha->ipha_length = htons(len);
35050Sstevel@tonic-gate 		}
35060Sstevel@tonic-gate 	}
35070Sstevel@tonic-gate 
35080Sstevel@tonic-gate 	/*
35090Sstevel@tonic-gate 	 * This is the inbound data path.  Packets are passed upstream as
35100Sstevel@tonic-gate 	 * T_UNITDATA_IND messages with full IP headers still attached.
35110Sstevel@tonic-gate 	 */
35120Sstevel@tonic-gate 	if (icmp->icmp_family == AF_INET) {
35130Sstevel@tonic-gate 		ASSERT(ipvers == IPV4_VERSION);
35140Sstevel@tonic-gate 		udi_size =  sizeof (struct T_unitdata_ind) + sizeof (sin_t);
35155267Snordmark 		if (icmp->icmp_recvif && (pinfo != NULL) &&
35163318Srshoaib 		    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
35170Sstevel@tonic-gate 			udi_size += sizeof (struct T_opthdr) +
35180Sstevel@tonic-gate 			    sizeof (uint_t);
35190Sstevel@tonic-gate 		}
35203318Srshoaib 
35215267Snordmark 		if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) &&
35223318Srshoaib 		    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
35233318Srshoaib 			udi_size += sizeof (struct T_opthdr) +
35243318Srshoaib 			    sizeof (struct in_pktinfo);
35253318Srshoaib 		}
35263318Srshoaib 
35271673Sgt145670 		/*
35281673Sgt145670 		 * If SO_TIMESTAMP is set allocate the appropriate sized
35291673Sgt145670 		 * buffer. Since gethrestime() expects a pointer aligned
35301673Sgt145670 		 * argument, we allocate space necessary for extra
35311673Sgt145670 		 * alignment (even though it might not be used).
35321673Sgt145670 		 */
35331673Sgt145670 		if (icmp->icmp_timestamp) {
35341673Sgt145670 			udi_size += sizeof (struct T_opthdr) +
35351673Sgt145670 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
35361673Sgt145670 		}
35370Sstevel@tonic-gate 		mp1 = allocb(udi_size, BPRI_MED);
35380Sstevel@tonic-gate 		if (mp1 == NULL) {
35390Sstevel@tonic-gate 			freemsg(mp);
35400Sstevel@tonic-gate 			if (options_mp != NULL)
35410Sstevel@tonic-gate 				freeb(options_mp);
35425240Snordmark 			BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
35430Sstevel@tonic-gate 			return;
35440Sstevel@tonic-gate 		}
35450Sstevel@tonic-gate 		mp1->b_cont = mp;
35460Sstevel@tonic-gate 		mp = mp1;
35470Sstevel@tonic-gate 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
35480Sstevel@tonic-gate 		mp->b_datap->db_type = M_PROTO;
35490Sstevel@tonic-gate 		mp->b_wptr = (uchar_t *)tudi + udi_size;
35500Sstevel@tonic-gate 		tudi->PRIM_type = T_UNITDATA_IND;
35510Sstevel@tonic-gate 		tudi->SRC_length = sizeof (sin_t);
35520Sstevel@tonic-gate 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
35530Sstevel@tonic-gate 		sin = (sin_t *)&tudi[1];
35540Sstevel@tonic-gate 		*sin = sin_null;
35550Sstevel@tonic-gate 		sin->sin_family = AF_INET;
35560Sstevel@tonic-gate 		sin->sin_addr.s_addr = ipha->ipha_src;
35570Sstevel@tonic-gate 		tudi->OPT_offset =  sizeof (struct T_unitdata_ind) +
35580Sstevel@tonic-gate 		    sizeof (sin_t);
35590Sstevel@tonic-gate 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
35600Sstevel@tonic-gate 		tudi->OPT_length = udi_size;
35610Sstevel@tonic-gate 
35620Sstevel@tonic-gate 		/*
35630Sstevel@tonic-gate 		 * Add options if IP_RECVIF is set
35640Sstevel@tonic-gate 		 */
35650Sstevel@tonic-gate 		if (udi_size != 0) {
35660Sstevel@tonic-gate 			char *dstopt;
35670Sstevel@tonic-gate 
35680Sstevel@tonic-gate 			dstopt = (char *)&sin[1];
35695267Snordmark 			if (icmp->icmp_recvif && (pinfo != NULL) &&
35703318Srshoaib 			    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
35710Sstevel@tonic-gate 
35720Sstevel@tonic-gate 				struct T_opthdr *toh;
35730Sstevel@tonic-gate 				uint_t		*dstptr;
35740Sstevel@tonic-gate 
35750Sstevel@tonic-gate 				toh = (struct T_opthdr *)dstopt;
35760Sstevel@tonic-gate 				toh->level = IPPROTO_IP;
35770Sstevel@tonic-gate 				toh->name = IP_RECVIF;
35780Sstevel@tonic-gate 				toh->len = sizeof (struct T_opthdr) +
35795240Snordmark 				    sizeof (uint_t);
35800Sstevel@tonic-gate 				toh->status = 0;
35810Sstevel@tonic-gate 				dstopt += sizeof (struct T_opthdr);
35820Sstevel@tonic-gate 				dstptr = (uint_t *)dstopt;
35833318Srshoaib 				*dstptr = pinfo->ip_pkt_ifindex;
35840Sstevel@tonic-gate 				dstopt += sizeof (uint_t);
35850Sstevel@tonic-gate 				udi_size -= toh->len;
35860Sstevel@tonic-gate 			}
35871673Sgt145670 			if (icmp->icmp_timestamp) {
35881673Sgt145670 				struct	T_opthdr *toh;
35891673Sgt145670 
35901673Sgt145670 				toh = (struct T_opthdr *)dstopt;
35911673Sgt145670 				toh->level = SOL_SOCKET;
35921673Sgt145670 				toh->name = SCM_TIMESTAMP;
35931673Sgt145670 				toh->len = sizeof (struct T_opthdr) +
35941673Sgt145670 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
35951673Sgt145670 				toh->status = 0;
35961673Sgt145670 				dstopt += sizeof (struct T_opthdr);
35971673Sgt145670 				/* Align for gethrestime() */
35981673Sgt145670 				dstopt = (char *)P2ROUNDUP((intptr_t)dstopt,
35991673Sgt145670 				    sizeof (intptr_t));
36001673Sgt145670 				gethrestime((timestruc_t *)dstopt);
36013318Srshoaib 				dstopt = (char *)toh + toh->len;
36023318Srshoaib 				udi_size -= toh->len;
36033318Srshoaib 			}
36045267Snordmark 			if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) &&
36053318Srshoaib 			    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
36063318Srshoaib 				struct	T_opthdr *toh;
36073318Srshoaib 				struct	in_pktinfo *pktinfop;
36083318Srshoaib 
36093318Srshoaib 				toh = (struct T_opthdr *)dstopt;
36103318Srshoaib 				toh->level = IPPROTO_IP;
36113318Srshoaib 				toh->name = IP_PKTINFO;
36123318Srshoaib 				toh->len = sizeof (struct T_opthdr) +
36133318Srshoaib 				    sizeof (in_pktinfo_t);
36143318Srshoaib 				toh->status = 0;
36153318Srshoaib 				dstopt += sizeof (struct T_opthdr);
36163318Srshoaib 				pktinfop = (struct in_pktinfo *)dstopt;
36173318Srshoaib 				pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex;
36183318Srshoaib 				pktinfop->ipi_spec_dst =
36193318Srshoaib 				    pinfo->ip_pkt_match_addr;
36203318Srshoaib 
36213318Srshoaib 				pktinfop->ipi_addr.s_addr = ipha->ipha_dst;
36223318Srshoaib 
36233318Srshoaib 				dstopt += sizeof (struct in_pktinfo);
36241673Sgt145670 				udi_size -= toh->len;
36251673Sgt145670 			}
36260Sstevel@tonic-gate 
36270Sstevel@tonic-gate 			/* Consumed all of allocated space */
36280Sstevel@tonic-gate 			ASSERT(udi_size == 0);
36290Sstevel@tonic-gate 		}
36300Sstevel@tonic-gate 
36315267Snordmark 		if (options_mp != NULL)
36325267Snordmark 			freeb(options_mp);
36335267Snordmark 
36345240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
36358348SEric.Yu@Sun.COM 		goto deliver;
36360Sstevel@tonic-gate 	}
36370Sstevel@tonic-gate 
36380Sstevel@tonic-gate 	/*
36390Sstevel@tonic-gate 	 * We don't need options_mp in the IPv6 path.
36400Sstevel@tonic-gate 	 */
36410Sstevel@tonic-gate 	if (options_mp != NULL) {
36420Sstevel@tonic-gate 		freeb(options_mp);
36430Sstevel@tonic-gate 		options_mp = NULL;
36440Sstevel@tonic-gate 	}
36450Sstevel@tonic-gate 
36460Sstevel@tonic-gate 	/*
36470Sstevel@tonic-gate 	 * Discard message if it is smaller than the IPv6 header
36480Sstevel@tonic-gate 	 * or if the header is malformed.
36490Sstevel@tonic-gate 	 */
36500Sstevel@tonic-gate 	if ((mp->b_wptr - rptr) < sizeof (ip6_t) ||
36510Sstevel@tonic-gate 	    IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION ||
36520Sstevel@tonic-gate 	    icmp->icmp_family != AF_INET6) {
36530Sstevel@tonic-gate 		freemsg(mp);
36545240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
36550Sstevel@tonic-gate 		return;
36560Sstevel@tonic-gate 	}
36570Sstevel@tonic-gate 
36580Sstevel@tonic-gate 	/* Initialize */
36590Sstevel@tonic-gate 	ipp.ipp_fields = 0;
36601676Sjpk 	hopstrip = 0;
36610Sstevel@tonic-gate 
36620Sstevel@tonic-gate 	ip6h = (ip6_t *)rptr;
36630Sstevel@tonic-gate 	/*
36640Sstevel@tonic-gate 	 * Call on ip_find_hdr_v6 which gets the total hdr len
36650Sstevel@tonic-gate 	 * as well as individual lenghts of ext hdrs (and ptrs to
36660Sstevel@tonic-gate 	 * them).
36670Sstevel@tonic-gate 	 */
36680Sstevel@tonic-gate 	if (ip6h->ip6_nxt != icmp->icmp_proto) {
36690Sstevel@tonic-gate 		/* Look for ifindex information */
36700Sstevel@tonic-gate 		if (ip6h->ip6_nxt == IPPROTO_RAW) {
36710Sstevel@tonic-gate 			ip6i = (ip6i_t *)ip6h;
36720Sstevel@tonic-gate 			if (ip6i->ip6i_flags & IP6I_IFINDEX) {
36730Sstevel@tonic-gate 				ASSERT(ip6i->ip6i_ifindex != 0);
36740Sstevel@tonic-gate 				ipp.ipp_fields |= IPPF_IFINDEX;
36750Sstevel@tonic-gate 				ipp.ipp_ifindex = ip6i->ip6i_ifindex;
36760Sstevel@tonic-gate 			}
36770Sstevel@tonic-gate 			rptr = (uchar_t *)&ip6i[1];
36780Sstevel@tonic-gate 			mp->b_rptr = rptr;
36790Sstevel@tonic-gate 			if (rptr == mp->b_wptr) {
36800Sstevel@tonic-gate 				mp1 = mp->b_cont;
36810Sstevel@tonic-gate 				freeb(mp);
36820Sstevel@tonic-gate 				mp = mp1;
36830Sstevel@tonic-gate 				rptr = mp->b_rptr;
36840Sstevel@tonic-gate 			}
36850Sstevel@tonic-gate 			ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN);
36860Sstevel@tonic-gate 			ip6h = (ip6_t *)rptr;
36870Sstevel@tonic-gate 		}
36880Sstevel@tonic-gate 		hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr);
36891676Sjpk 
36901676Sjpk 		/*
36911676Sjpk 		 * We need to lie a bit to the user because users inside
36921676Sjpk 		 * labeled compartments should not see their own labels.  We
36931676Sjpk 		 * assume that in all other respects IP has checked the label,
36941676Sjpk 		 * and that the label is always first among the options.  (If
36951676Sjpk 		 * it's not first, then this code won't see it, and the option
36961676Sjpk 		 * will be passed along to the user.)
36971676Sjpk 		 *
36981676Sjpk 		 * If we had multilevel ICMP sockets, then the following code
36991676Sjpk 		 * should be skipped for them to allow the user to see the
37001676Sjpk 		 * label.
37011676Sjpk 		 *
37021676Sjpk 		 * Alignment restrictions in the definition of IP options
37031676Sjpk 		 * (namely, the requirement that the 4-octet DOI goes on a
37041676Sjpk 		 * 4-octet boundary) mean that we know exactly where the option
37051676Sjpk 		 * should start, but we're lenient for other hosts.
37061676Sjpk 		 *
37071676Sjpk 		 * Note that there are no multilevel ICMP or raw IP sockets
37081676Sjpk 		 * yet, thus nobody ever sees the IP6OPT_LS option.
37091676Sjpk 		 */
37101676Sjpk 		if ((ipp.ipp_fields & IPPF_HOPOPTS) &&
37111676Sjpk 		    ipp.ipp_hopoptslen > 5 && is_system_labeled()) {
37121676Sjpk 			const uchar_t *ucp =
37131676Sjpk 			    (const uchar_t *)ipp.ipp_hopopts + 2;
37141676Sjpk 			int remlen = ipp.ipp_hopoptslen - 2;
37151676Sjpk 
37161676Sjpk 			while (remlen > 0) {
37171676Sjpk 				if (*ucp == IP6OPT_PAD1) {
37181676Sjpk 					remlen--;
37191676Sjpk 					ucp++;
37201676Sjpk 				} else if (*ucp == IP6OPT_PADN) {
37211676Sjpk 					remlen -= ucp[1] + 2;
37221676Sjpk 					ucp += ucp[1] + 2;
37231676Sjpk 				} else if (*ucp == ip6opt_ls) {
37241676Sjpk 					hopstrip = (ucp -
37251676Sjpk 					    (const uchar_t *)ipp.ipp_hopopts) +
37261676Sjpk 					    ucp[1] + 2;
37271676Sjpk 					hopstrip = (hopstrip + 7) & ~7;
37281676Sjpk 					break;
37291676Sjpk 				} else {
37301676Sjpk 					/* label option must be first */
37311676Sjpk 					break;
37321676Sjpk 				}
37331676Sjpk 			}
37341676Sjpk 		}
37350Sstevel@tonic-gate 	} else {
37360Sstevel@tonic-gate 		hdr_len = IPV6_HDR_LEN;
37370Sstevel@tonic-gate 		ip6i = NULL;
37380Sstevel@tonic-gate 		nexthdr = ip6h->ip6_nxt;
37390Sstevel@tonic-gate 	}
37400Sstevel@tonic-gate 	/*
37410Sstevel@tonic-gate 	 * One special case where IP attaches the IRE needs to
37420Sstevel@tonic-gate 	 * be handled so that we don't send up IRE to the user land.
37430Sstevel@tonic-gate 	 */
37440Sstevel@tonic-gate 	if (nexthdr == IPPROTO_TCP) {
37450Sstevel@tonic-gate 		tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len];
37460Sstevel@tonic-gate 
37470Sstevel@tonic-gate 		if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) &&
37480Sstevel@tonic-gate 		    mp->b_cont != NULL) {
37490Sstevel@tonic-gate 			mp1 = mp->b_cont;
37500Sstevel@tonic-gate 			if (mp1->b_datap->db_type == IRE_DB_TYPE) {
37510Sstevel@tonic-gate 				freeb(mp1);
37520Sstevel@tonic-gate 				mp->b_cont = NULL;
37530Sstevel@tonic-gate 			}
37540Sstevel@tonic-gate 		}
37550Sstevel@tonic-gate 	}
37560Sstevel@tonic-gate 	/*
37570Sstevel@tonic-gate 	 * Check a filter for ICMPv6 types if needed.
37580Sstevel@tonic-gate 	 * Verify raw checksums if needed.
37590Sstevel@tonic-gate 	 */
37600Sstevel@tonic-gate 	if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) {
37610Sstevel@tonic-gate 		if (icmp->icmp_filter != NULL) {
37620Sstevel@tonic-gate 			int type;
37630Sstevel@tonic-gate 
37640Sstevel@tonic-gate 			/* Assumes that IP has done the pullupmsg */
37650Sstevel@tonic-gate 			type = mp->b_rptr[hdr_len];
37660Sstevel@tonic-gate 
37670Sstevel@tonic-gate 			ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr);
37680Sstevel@tonic-gate 			if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) {
37690Sstevel@tonic-gate 				freemsg(mp);
37700Sstevel@tonic-gate 				return;
37710Sstevel@tonic-gate 			}
37720Sstevel@tonic-gate 		} else {
37730Sstevel@tonic-gate 			/* Checksum */
37740Sstevel@tonic-gate 			uint16_t	*up;
37750Sstevel@tonic-gate 			uint32_t	sum;
37760Sstevel@tonic-gate 			int		remlen;
37770Sstevel@tonic-gate 
37780Sstevel@tonic-gate 			up = (uint16_t *)&ip6h->ip6_src;
37790Sstevel@tonic-gate 
37800Sstevel@tonic-gate 			remlen = msgdsize(mp) - hdr_len;
37810Sstevel@tonic-gate 			sum = htons(icmp->icmp_proto + remlen)
37820Sstevel@tonic-gate 			    + up[0] + up[1] + up[2] + up[3]
37830Sstevel@tonic-gate 			    + up[4] + up[5] + up[6] + up[7]
37840Sstevel@tonic-gate 			    + up[8] + up[9] + up[10] + up[11]
37850Sstevel@tonic-gate 			    + up[12] + up[13] + up[14] + up[15];
37860Sstevel@tonic-gate 			sum = (sum & 0xffff) + (sum >> 16);
37870Sstevel@tonic-gate 			sum = IP_CSUM(mp, hdr_len, sum);
37880Sstevel@tonic-gate 			if (sum != 0) {
37890Sstevel@tonic-gate 				/* IPv6 RAW checksum failed */
37900Sstevel@tonic-gate 				ip0dbg(("icmp_rput: RAW checksum "
37910Sstevel@tonic-gate 				    "failed %x\n", sum));
37920Sstevel@tonic-gate 				freemsg(mp);
37935240Snordmark 				BUMP_MIB(&is->is_rawip_mib,
37943448Sdh155122 				    rawipInCksumErrs);
37950Sstevel@tonic-gate 				return;
37960Sstevel@tonic-gate 			}
37970Sstevel@tonic-gate 		}
37980Sstevel@tonic-gate 	}
37990Sstevel@tonic-gate 	/* Skip all the IPv6 headers per API */
38000Sstevel@tonic-gate 	mp->b_rptr += hdr_len;
38010Sstevel@tonic-gate 
38020Sstevel@tonic-gate 	udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
38030Sstevel@tonic-gate 
38040Sstevel@tonic-gate 	/*
38050Sstevel@tonic-gate 	 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to
38060Sstevel@tonic-gate 	 * maintain state information, instead of relying on icmp_t
38070Sstevel@tonic-gate 	 * structure, since there arent any locks protecting these members
38080Sstevel@tonic-gate 	 * and there is a window where there might be a race between a
38090Sstevel@tonic-gate 	 * thread setting options on the write side and a thread reading
38100Sstevel@tonic-gate 	 * these options on the read size.
38110Sstevel@tonic-gate 	 */
38120Sstevel@tonic-gate 	if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
38130Sstevel@tonic-gate 	    IPPF_RTHDR|IPPF_IFINDEX)) {
38140Sstevel@tonic-gate 		if (icmp->icmp_ipv6_recvhopopts &&
38151676Sjpk 		    (ipp.ipp_fields & IPPF_HOPOPTS) &&
38161676Sjpk 		    ipp.ipp_hopoptslen > hopstrip) {
38170Sstevel@tonic-gate 			udi_size += sizeof (struct T_opthdr) +
38181676Sjpk 			    ipp.ipp_hopoptslen - hopstrip;
38190Sstevel@tonic-gate 			icmp_opt |= IPPF_HOPOPTS;
38200Sstevel@tonic-gate 		}
38210Sstevel@tonic-gate 		if ((icmp->icmp_ipv6_recvdstopts ||
38225240Snordmark 		    icmp->icmp_old_ipv6_recvdstopts) &&
38230Sstevel@tonic-gate 		    (ipp.ipp_fields & IPPF_DSTOPTS)) {
38240Sstevel@tonic-gate 			udi_size += sizeof (struct T_opthdr) +
38250Sstevel@tonic-gate 			    ipp.ipp_dstoptslen;
38260Sstevel@tonic-gate 			icmp_opt |= IPPF_DSTOPTS;
38270Sstevel@tonic-gate 		}
38280Sstevel@tonic-gate 		if (((icmp->icmp_ipv6_recvdstopts &&
38290Sstevel@tonic-gate 		    icmp->icmp_ipv6_recvrthdr &&
38300Sstevel@tonic-gate 		    (ipp.ipp_fields & IPPF_RTHDR)) ||
38310Sstevel@tonic-gate 		    icmp->icmp_ipv6_recvrtdstopts) &&
38320Sstevel@tonic-gate 		    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
38330Sstevel@tonic-gate 			udi_size += sizeof (struct T_opthdr) +
38340Sstevel@tonic-gate 			    ipp.ipp_rtdstoptslen;
38350Sstevel@tonic-gate 			icmp_opt |= IPPF_RTDSTOPTS;
38360Sstevel@tonic-gate 		}
38370Sstevel@tonic-gate 		if (icmp->icmp_ipv6_recvrthdr &&
38380Sstevel@tonic-gate 		    (ipp.ipp_fields & IPPF_RTHDR)) {
38390Sstevel@tonic-gate 			udi_size += sizeof (struct T_opthdr) +
38400Sstevel@tonic-gate 			    ipp.ipp_rthdrlen;
38410Sstevel@tonic-gate 			icmp_opt |= IPPF_RTHDR;
38420Sstevel@tonic-gate 		}
38433318Srshoaib 		if (icmp->icmp_ip_recvpktinfo &&
38440Sstevel@tonic-gate 		    (ipp.ipp_fields & IPPF_IFINDEX)) {
38450Sstevel@tonic-gate 			udi_size += sizeof (struct T_opthdr) +
38460Sstevel@tonic-gate 			    sizeof (struct in6_pktinfo);
38470Sstevel@tonic-gate 			icmp_opt |= IPPF_IFINDEX;
38480Sstevel@tonic-gate 		}
38490Sstevel@tonic-gate 	}
38500Sstevel@tonic-gate 	if (icmp->icmp_ipv6_recvhoplimit) {
38510Sstevel@tonic-gate 		udi_size += sizeof (struct T_opthdr) + sizeof (int);
38520Sstevel@tonic-gate 		icmp_ipv6_recvhoplimit = B_TRUE;
38530Sstevel@tonic-gate 	}
38540Sstevel@tonic-gate 
38550Sstevel@tonic-gate 	if (icmp->icmp_ipv6_recvtclass)
38560Sstevel@tonic-gate 		udi_size += sizeof (struct T_opthdr) + sizeof (int);
38570Sstevel@tonic-gate 
38585401Snordmark 	/*
38595401Snordmark 	 * If SO_TIMESTAMP is set allocate the appropriate sized
38605401Snordmark 	 * buffer. Since gethrestime() expects a pointer aligned
38615401Snordmark 	 * argument, we allocate space necessary for extra
38625401Snordmark 	 * alignment (even though it might not be used).
38635401Snordmark 	 */
38645401Snordmark 	if (icmp->icmp_timestamp) {
38655401Snordmark 		udi_size += sizeof (struct T_opthdr) +
38665401Snordmark 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
38675401Snordmark 	}
38685401Snordmark 
38690Sstevel@tonic-gate 	mp1 = allocb(udi_size, BPRI_MED);
38700Sstevel@tonic-gate 	if (mp1 == NULL) {
38710Sstevel@tonic-gate 		freemsg(mp);
38725240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
38730Sstevel@tonic-gate 		return;
38740Sstevel@tonic-gate 	}
38750Sstevel@tonic-gate 	mp1->b_cont = mp;
38760Sstevel@tonic-gate 	mp = mp1;
38770Sstevel@tonic-gate 	mp->b_datap->db_type = M_PROTO;
38780Sstevel@tonic-gate 	tudi = (struct T_unitdata_ind *)mp->b_rptr;
38790Sstevel@tonic-gate 	mp->b_wptr = (uchar_t *)tudi + udi_size;
38800Sstevel@tonic-gate 	tudi->PRIM_type = T_UNITDATA_IND;
38810Sstevel@tonic-gate 	tudi->SRC_length = sizeof (sin6_t);
38820Sstevel@tonic-gate 	tudi->SRC_offset = sizeof (struct T_unitdata_ind);
38830Sstevel@tonic-gate 	tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
38840Sstevel@tonic-gate 	udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
38850Sstevel@tonic-gate 	tudi->OPT_length = udi_size;
38860Sstevel@tonic-gate 	sin6 = (sin6_t *)&tudi[1];
38870Sstevel@tonic-gate 	sin6->sin6_port = 0;
38880Sstevel@tonic-gate 	sin6->sin6_family = AF_INET6;
38890Sstevel@tonic-gate 
38900Sstevel@tonic-gate 	sin6->sin6_addr = ip6h->ip6_src;
38910Sstevel@tonic-gate 	/* No sin6_flowinfo per API */
38920Sstevel@tonic-gate 	sin6->sin6_flowinfo = 0;
38930Sstevel@tonic-gate 	/* For link-scope source pass up scope id */
38940Sstevel@tonic-gate 	if ((ipp.ipp_fields & IPPF_IFINDEX) &&
38950Sstevel@tonic-gate 	    IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
38960Sstevel@tonic-gate 		sin6->sin6_scope_id = ipp.ipp_ifindex;
38970Sstevel@tonic-gate 	else
38980Sstevel@tonic-gate 		sin6->sin6_scope_id = 0;
38990Sstevel@tonic-gate 
39000Sstevel@tonic-gate 	sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst,
39013448Sdh155122 	    icmp->icmp_zoneid, is->is_netstack);
39020Sstevel@tonic-gate 
39030Sstevel@tonic-gate 	if (udi_size != 0) {
39040Sstevel@tonic-gate 		uchar_t *dstopt;
39050Sstevel@tonic-gate 
39060Sstevel@tonic-gate 		dstopt = (uchar_t *)&sin6[1];
39070Sstevel@tonic-gate 		if (icmp_opt & IPPF_IFINDEX) {
39080Sstevel@tonic-gate 			struct T_opthdr *toh;
39090Sstevel@tonic-gate 			struct in6_pktinfo *pkti;
39100Sstevel@tonic-gate 
39110Sstevel@tonic-gate 			toh = (struct T_opthdr *)dstopt;
39120Sstevel@tonic-gate 			toh->level = IPPROTO_IPV6;
39130Sstevel@tonic-gate 			toh->name = IPV6_PKTINFO;
39140Sstevel@tonic-gate 			toh->len = sizeof (struct T_opthdr) +
39150Sstevel@tonic-gate 			    sizeof (*pkti);
39160Sstevel@tonic-gate 			toh->status = 0;
39170Sstevel@tonic-gate 			dstopt += sizeof (struct T_opthdr);
39180Sstevel@tonic-gate 			pkti = (struct in6_pktinfo *)dstopt;
39190Sstevel@tonic-gate 			pkti->ipi6_addr = ip6h->ip6_dst;
39200Sstevel@tonic-gate 			pkti->ipi6_ifindex = ipp.ipp_ifindex;
39210Sstevel@tonic-gate 			dstopt += sizeof (*pkti);
39220Sstevel@tonic-gate 			udi_size -= toh->len;
39230Sstevel@tonic-gate 		}
39240Sstevel@tonic-gate 		if (icmp_ipv6_recvhoplimit) {
39250Sstevel@tonic-gate 			struct T_opthdr *toh;
39260Sstevel@tonic-gate 
39270Sstevel@tonic-gate 			toh = (struct T_opthdr *)dstopt;
39280Sstevel@tonic-gate 			toh->level = IPPROTO_IPV6;
39290Sstevel@tonic-gate 			toh->name = IPV6_HOPLIMIT;
39300Sstevel@tonic-gate 			toh->len = sizeof (struct T_opthdr) +
39310Sstevel@tonic-gate 			    sizeof (uint_t);
39320Sstevel@tonic-gate 			toh->status = 0;
39330Sstevel@tonic-gate 			dstopt += sizeof (struct T_opthdr);
39340Sstevel@tonic-gate 			*(uint_t *)dstopt = ip6h->ip6_hops;
39350Sstevel@tonic-gate 			dstopt += sizeof (uint_t);
39360Sstevel@tonic-gate 			udi_size -= toh->len;
39370Sstevel@tonic-gate 		}
39380Sstevel@tonic-gate 		if (icmp->icmp_ipv6_recvtclass) {
39390Sstevel@tonic-gate 			struct T_opthdr *toh;
39400Sstevel@tonic-gate 
39410Sstevel@tonic-gate 			toh = (struct T_opthdr *)dstopt;
39420Sstevel@tonic-gate 			toh->level = IPPROTO_IPV6;
39430Sstevel@tonic-gate 			toh->name = IPV6_TCLASS;
39440Sstevel@tonic-gate 			toh->len = sizeof (struct T_opthdr) +
39450Sstevel@tonic-gate 			    sizeof (uint_t);
39460Sstevel@tonic-gate 			toh->status = 0;
39470Sstevel@tonic-gate 			dstopt += sizeof (struct T_opthdr);
39480Sstevel@tonic-gate 			*(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
39490Sstevel@tonic-gate 			dstopt += sizeof (uint_t);
39500Sstevel@tonic-gate 			udi_size -= toh->len;
39510Sstevel@tonic-gate 		}
39525401Snordmark 		if (icmp->icmp_timestamp) {
39538348SEric.Yu@Sun.COM 			struct  T_opthdr *toh;
39545401Snordmark 
39555401Snordmark 			toh = (struct T_opthdr *)dstopt;
39565401Snordmark 			toh->level = SOL_SOCKET;
39575401Snordmark 			toh->name = SCM_TIMESTAMP;
39585401Snordmark 			toh->len = sizeof (struct T_opthdr) +
39595401Snordmark 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
39605401Snordmark 			toh->status = 0;
39615401Snordmark 			dstopt += sizeof (struct T_opthdr);
39625401Snordmark 			/* Align for gethrestime() */
39635401Snordmark 			dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt,
39645401Snordmark 			    sizeof (intptr_t));
39655401Snordmark 			gethrestime((timestruc_t *)dstopt);
39665401Snordmark 			dstopt = (uchar_t *)toh + toh->len;
39675401Snordmark 			udi_size -= toh->len;
39685401Snordmark 		}
39698348SEric.Yu@Sun.COM 
39700Sstevel@tonic-gate 		if (icmp_opt & IPPF_HOPOPTS) {
39710Sstevel@tonic-gate 			struct T_opthdr *toh;
39720Sstevel@tonic-gate 
39730Sstevel@tonic-gate 			toh = (struct T_opthdr *)dstopt;
39740Sstevel@tonic-gate 			toh->level = IPPROTO_IPV6;
39750Sstevel@tonic-gate 			toh->name = IPV6_HOPOPTS;
39760Sstevel@tonic-gate 			toh->len = sizeof (struct T_opthdr) +
39771676Sjpk 			    ipp.ipp_hopoptslen - hopstrip;
39780Sstevel@tonic-gate 			toh->status = 0;
39790Sstevel@tonic-gate 			dstopt += sizeof (struct T_opthdr);
39801676Sjpk 			bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt,
39811676Sjpk 			    ipp.ipp_hopoptslen - hopstrip);
39821676Sjpk 			if (hopstrip > 0) {
39831676Sjpk 				/* copy next header value and fake length */
39841676Sjpk 				dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0];
39851676Sjpk 				dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] -
39861676Sjpk 				    hopstrip / 8;
39871676Sjpk 			}
39881676Sjpk 			dstopt += ipp.ipp_hopoptslen - hopstrip;
39890Sstevel@tonic-gate 			udi_size -= toh->len;
39900Sstevel@tonic-gate 		}
39910Sstevel@tonic-gate 		if (icmp_opt & IPPF_RTDSTOPTS) {
39920Sstevel@tonic-gate 			struct T_opthdr *toh;
39930Sstevel@tonic-gate 
39940Sstevel@tonic-gate 			toh = (struct T_opthdr *)dstopt;
39950Sstevel@tonic-gate 			toh->level = IPPROTO_IPV6;
39960Sstevel@tonic-gate 			toh->name = IPV6_DSTOPTS;
39970Sstevel@tonic-gate 			toh->len = sizeof (struct T_opthdr) +
39980Sstevel@tonic-gate 			    ipp.ipp_rtdstoptslen;
39990Sstevel@tonic-gate 			toh->status = 0;
40000Sstevel@tonic-gate 			dstopt += sizeof (struct T_opthdr);
40010Sstevel@tonic-gate 			bcopy(ipp.ipp_rtdstopts, dstopt,
40020Sstevel@tonic-gate 			    ipp.ipp_rtdstoptslen);
40030Sstevel@tonic-gate 			dstopt += ipp.ipp_rtdstoptslen;
40040Sstevel@tonic-gate 			udi_size -= toh->len;
40050Sstevel@tonic-gate 		}
40060Sstevel@tonic-gate 		if (icmp_opt & IPPF_RTHDR) {
40070Sstevel@tonic-gate 			struct T_opthdr *toh;
40080Sstevel@tonic-gate 
40090Sstevel@tonic-gate 			toh = (struct T_opthdr *)dstopt;
40100Sstevel@tonic-gate 			toh->level = IPPROTO_IPV6;
40110Sstevel@tonic-gate 			toh->name = IPV6_RTHDR;
40120Sstevel@tonic-gate 			toh->len = sizeof (struct T_opthdr) +
40130Sstevel@tonic-gate 			    ipp.ipp_rthdrlen;
40140Sstevel@tonic-gate 			toh->status = 0;
40150Sstevel@tonic-gate 			dstopt += sizeof (struct T_opthdr);
40160Sstevel@tonic-gate 			bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen);
40170Sstevel@tonic-gate 			dstopt += ipp.ipp_rthdrlen;
40180Sstevel@tonic-gate 			udi_size -= toh->len;
40190Sstevel@tonic-gate 		}
40200Sstevel@tonic-gate 		if (icmp_opt & IPPF_DSTOPTS) {
40210Sstevel@tonic-gate 			struct T_opthdr *toh;
40220Sstevel@tonic-gate 
40230Sstevel@tonic-gate 			toh = (struct T_opthdr *)dstopt;
40240Sstevel@tonic-gate 			toh->level = IPPROTO_IPV6;
40250Sstevel@tonic-gate 			toh->name = IPV6_DSTOPTS;
40260Sstevel@tonic-gate 			toh->len = sizeof (struct T_opthdr) +
40270Sstevel@tonic-gate 			    ipp.ipp_dstoptslen;
40280Sstevel@tonic-gate 			toh->status = 0;
40290Sstevel@tonic-gate 			dstopt += sizeof (struct T_opthdr);
40300Sstevel@tonic-gate 			bcopy(ipp.ipp_dstopts, dstopt,
40310Sstevel@tonic-gate 			    ipp.ipp_dstoptslen);
40320Sstevel@tonic-gate 			dstopt += ipp.ipp_dstoptslen;
40330Sstevel@tonic-gate 			udi_size -= toh->len;
40340Sstevel@tonic-gate 		}
40350Sstevel@tonic-gate 		/* Consumed all of allocated space */
40360Sstevel@tonic-gate 		ASSERT(udi_size == 0);
40370Sstevel@tonic-gate 	}
40385240Snordmark 	BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
40398348SEric.Yu@Sun.COM 
40408348SEric.Yu@Sun.COM deliver:
40418348SEric.Yu@Sun.COM 	if (IPCL_IS_NONSTR(connp)) {
40428348SEric.Yu@Sun.COM 		if ((*connp->conn_upcalls->su_recv)
40438348SEric.Yu@Sun.COM 		    (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error,
40448348SEric.Yu@Sun.COM 		    NULL) < 0) {
40458348SEric.Yu@Sun.COM 			mutex_enter(&icmp->icmp_recv_lock);
40468348SEric.Yu@Sun.COM 			if (error == ENOSPC) {
40478348SEric.Yu@Sun.COM 				/*
40488348SEric.Yu@Sun.COM 				 * let's confirm while holding the lock
40498348SEric.Yu@Sun.COM 				 */
40508348SEric.Yu@Sun.COM 				if ((*connp->conn_upcalls->su_recv)
40518348SEric.Yu@Sun.COM 				    (connp->conn_upper_handle, NULL, 0, 0,
40528348SEric.Yu@Sun.COM 				    &error, NULL) < 0) {
40538348SEric.Yu@Sun.COM 					if (error == ENOSPC) {
40548348SEric.Yu@Sun.COM 						connp->conn_flow_cntrld =
40558348SEric.Yu@Sun.COM 						    B_TRUE;
40568348SEric.Yu@Sun.COM 					} else {
40578348SEric.Yu@Sun.COM 						ASSERT(error == EOPNOTSUPP);
40588348SEric.Yu@Sun.COM 					}
40590Sstevel@tonic-gate 				}
40608348SEric.Yu@Sun.COM 				mutex_exit(&icmp->icmp_recv_lock);
40618348SEric.Yu@Sun.COM 			} else {
40628348SEric.Yu@Sun.COM 				ASSERT(error == EOPNOTSUPP);
40638348SEric.Yu@Sun.COM 				icmp_queue_fallback(icmp, mp);
40640Sstevel@tonic-gate 			}
40650Sstevel@tonic-gate 		}
40668348SEric.Yu@Sun.COM 	} else {
40678348SEric.Yu@Sun.COM 		putnext(connp->conn_rq, mp);
40688348SEric.Yu@Sun.COM 	}
40698348SEric.Yu@Sun.COM 	ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock));
40700Sstevel@tonic-gate }
40710Sstevel@tonic-gate 
40720Sstevel@tonic-gate /*
40730Sstevel@tonic-gate  * return SNMP stuff in buffer in mpdata
40740Sstevel@tonic-gate  */
40755240Snordmark mblk_t *
40760Sstevel@tonic-gate icmp_snmp_get(queue_t *q, mblk_t *mpctl)
40770Sstevel@tonic-gate {
40780Sstevel@tonic-gate 	mblk_t			*mpdata;
40790Sstevel@tonic-gate 	struct opthdr		*optp;
40805240Snordmark 	conn_t			*connp = Q_TO_CONN(q);
40815240Snordmark 	icmp_stack_t		*is = connp->conn_netstack->netstack_icmp;
40825240Snordmark 	mblk_t			*mp2ctl;
40835240Snordmark 
40845240Snordmark 	/*
40855240Snordmark 	 * make a copy of the original message
40865240Snordmark 	 */
40875240Snordmark 	mp2ctl = copymsg(mpctl);
40880Sstevel@tonic-gate 
40890Sstevel@tonic-gate 	if (mpctl == NULL ||
40900Sstevel@tonic-gate 	    (mpdata = mpctl->b_cont) == NULL) {
40915240Snordmark 		freemsg(mpctl);
40925240Snordmark 		freemsg(mp2ctl);
40930Sstevel@tonic-gate 		return (0);
40940Sstevel@tonic-gate 	}
40950Sstevel@tonic-gate 
40960Sstevel@tonic-gate 	/* fixed length structure for IPv4 and IPv6 counters */
40970Sstevel@tonic-gate 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
40980Sstevel@tonic-gate 	optp->level = EXPER_RAWIP;
40990Sstevel@tonic-gate 	optp->name = 0;
41005240Snordmark 	(void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib,
41015240Snordmark 	    sizeof (is->is_rawip_mib));
41020Sstevel@tonic-gate 	optp->len = msgdsize(mpdata);
41030Sstevel@tonic-gate 	qreply(q, mpctl);
41040Sstevel@tonic-gate 
41055240Snordmark 	return (mp2ctl);
41060Sstevel@tonic-gate }
41070Sstevel@tonic-gate 
41080Sstevel@tonic-gate /*
41090Sstevel@tonic-gate  * Return 0 if invalid set request, 1 otherwise, including non-rawip requests.
41100Sstevel@tonic-gate  * TODO:  If this ever actually tries to set anything, it needs to be
41110Sstevel@tonic-gate  * to do the appropriate locking.
41120Sstevel@tonic-gate  */
41130Sstevel@tonic-gate /* ARGSUSED */
41145240Snordmark int
41150Sstevel@tonic-gate icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
41160Sstevel@tonic-gate     uchar_t *ptr, int len)
41170Sstevel@tonic-gate {
41180Sstevel@tonic-gate 	switch (level) {
41190Sstevel@tonic-gate 	case EXPER_RAWIP:
41200Sstevel@tonic-gate 		return (0);
41210Sstevel@tonic-gate 	default:
41220Sstevel@tonic-gate 		return (1);
41230Sstevel@tonic-gate 	}
41240Sstevel@tonic-gate }
41250Sstevel@tonic-gate 
41260Sstevel@tonic-gate /* Report for ndd "icmp_status" */
41270Sstevel@tonic-gate /* ARGSUSED */
41280Sstevel@tonic-gate static int
41290Sstevel@tonic-gate icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
41300Sstevel@tonic-gate {
41315240Snordmark 	conn_t  *connp;
41325240Snordmark 	ip_stack_t *ipst;
41330Sstevel@tonic-gate 	char	laddrbuf[INET6_ADDRSTRLEN];
41340Sstevel@tonic-gate 	char	faddrbuf[INET6_ADDRSTRLEN];
41355240Snordmark 	int	i;
41360Sstevel@tonic-gate 
41370Sstevel@tonic-gate 	(void) mi_mpprintf(mp,
41380Sstevel@tonic-gate 	    "RAWIP    " MI_COL_HDRPAD_STR
41390Sstevel@tonic-gate 	/*   01234567[89ABCDEF] */
41400Sstevel@tonic-gate 	    "  src addr        dest addr       state");
41410Sstevel@tonic-gate 	/*   xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */
41420Sstevel@tonic-gate 
41435240Snordmark 	connp = Q_TO_CONN(q);
41445240Snordmark 	ipst = connp->conn_netstack->netstack_ip;
41455240Snordmark 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
41465240Snordmark 		connf_t *connfp;
41475240Snordmark 		char	*state;
41485240Snordmark 
41495240Snordmark 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
41505240Snordmark 		connp = NULL;
41515240Snordmark 
41525240Snordmark 		while ((connp = ipcl_get_next_conn(connfp, connp,
41535240Snordmark 		    IPCL_RAWIPCONN)) != NULL) {
41545240Snordmark 			icmp_t  *icmp;
41555240Snordmark 
41565240Snordmark 			mutex_enter(&(connp)->conn_lock);
41575240Snordmark 			icmp = connp->conn_icmp;
41585240Snordmark 
41595240Snordmark 			if (icmp->icmp_state == TS_UNBND)
41605240Snordmark 				state = "UNBOUND";
41615240Snordmark 			else if (icmp->icmp_state == TS_IDLE)
41625240Snordmark 				state = "IDLE";
41635240Snordmark 			else if (icmp->icmp_state == TS_DATA_XFER)
41645240Snordmark 				state = "CONNECTED";
41655240Snordmark 			else
41665240Snordmark 				state = "UnkState";
41675240Snordmark 
41685240Snordmark 			(void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s",
41695240Snordmark 			    (void *)icmp,
41708348SEric.Yu@Sun.COM 			    inet_ntop(AF_INET6, &icmp->icmp_v6dst.sin6_addr,
41718348SEric.Yu@Sun.COM 			    faddrbuf,
41725240Snordmark 			    sizeof (faddrbuf)),
41735240Snordmark 			    inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf,
41745240Snordmark 			    sizeof (laddrbuf)),
41755240Snordmark 			    state);
41765240Snordmark 			mutex_exit(&(connp)->conn_lock);
41775240Snordmark 		}
41780Sstevel@tonic-gate 	}
41790Sstevel@tonic-gate 	return (0);
41800Sstevel@tonic-gate }
41810Sstevel@tonic-gate 
41820Sstevel@tonic-gate /*
41830Sstevel@tonic-gate  * This routine creates a T_UDERROR_IND message and passes it upstream.
41840Sstevel@tonic-gate  * The address and options are copied from the T_UNITDATA_REQ message
41850Sstevel@tonic-gate  * passed in mp.  This message is freed.
41860Sstevel@tonic-gate  */
41870Sstevel@tonic-gate static void
41880Sstevel@tonic-gate icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
41890Sstevel@tonic-gate {
41900Sstevel@tonic-gate 	mblk_t	*mp1;
41910Sstevel@tonic-gate 	uchar_t	*rptr = mp->b_rptr;
41920Sstevel@tonic-gate 	struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr;
41930Sstevel@tonic-gate 
41940Sstevel@tonic-gate 	mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset],
41950Sstevel@tonic-gate 	    tudr->DEST_length, (char *)&rptr[tudr->OPT_offset],
41960Sstevel@tonic-gate 	    tudr->OPT_length, err);
41970Sstevel@tonic-gate 	if (mp1)
41980Sstevel@tonic-gate 		qreply(q, mp1);
41990Sstevel@tonic-gate 	freemsg(mp);
42000Sstevel@tonic-gate }
42010Sstevel@tonic-gate 
42028348SEric.Yu@Sun.COM 
42038348SEric.Yu@Sun.COM static int
42048348SEric.Yu@Sun.COM rawip_do_unbind(conn_t *connp)
42050Sstevel@tonic-gate {
42068348SEric.Yu@Sun.COM 	icmp_t *icmp = connp->conn_icmp;
42075240Snordmark 
42085240Snordmark 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
42090Sstevel@tonic-gate 	/* If a bind has not been done, we can't unbind. */
42105240Snordmark 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
42115240Snordmark 		rw_exit(&icmp->icmp_rwlock);
42128348SEric.Yu@Sun.COM 		return (-TOUTSTATE);
42130Sstevel@tonic-gate 	}
42145240Snordmark 	icmp->icmp_pending_op = T_UNBIND_REQ;
42155240Snordmark 	rw_exit(&icmp->icmp_rwlock);
42165240Snordmark 
42175240Snordmark 	/*
42188348SEric.Yu@Sun.COM 	 * Call ip to unbind
42195240Snordmark 	 */
42208348SEric.Yu@Sun.COM 
42218348SEric.Yu@Sun.COM 	ip_unbind(connp);
42225240Snordmark 
42235240Snordmark 	/*
42245240Snordmark 	 * Once we're unbound from IP, the pending operation may be cleared
42255240Snordmark 	 * here.
42265240Snordmark 	 */
42275240Snordmark 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
42280Sstevel@tonic-gate 	V6_SET_ZERO(icmp->icmp_v6src);
42290Sstevel@tonic-gate 	V6_SET_ZERO(icmp->icmp_bound_v6src);
42305240Snordmark 	icmp->icmp_pending_op = -1;
42310Sstevel@tonic-gate 	icmp->icmp_state = TS_UNBND;
42325240Snordmark 	if (icmp->icmp_family == AF_INET6)
42335240Snordmark 		(void) icmp_build_hdrs(icmp);
42345240Snordmark 	rw_exit(&icmp->icmp_rwlock);
42358348SEric.Yu@Sun.COM 	return (0);
42368348SEric.Yu@Sun.COM }
42378348SEric.Yu@Sun.COM 
42388348SEric.Yu@Sun.COM /*
42398348SEric.Yu@Sun.COM  * This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
42408348SEric.Yu@Sun.COM  * After some error checking, the message is passed downstream to ip.
42418348SEric.Yu@Sun.COM  */
42428348SEric.Yu@Sun.COM static void
42438348SEric.Yu@Sun.COM icmp_tpi_unbind(queue_t *q, mblk_t *mp)
42448348SEric.Yu@Sun.COM {
42458348SEric.Yu@Sun.COM 	conn_t	*connp = Q_TO_CONN(q);
42468348SEric.Yu@Sun.COM 	int	error;
42478348SEric.Yu@Sun.COM 
42488348SEric.Yu@Sun.COM 	ASSERT(mp->b_cont == NULL);
42498348SEric.Yu@Sun.COM 	error = rawip_do_unbind(connp);
42508348SEric.Yu@Sun.COM 	if (error) {
42518348SEric.Yu@Sun.COM 		if (error < 0) {
42528348SEric.Yu@Sun.COM 			icmp_err_ack(q, mp, -error, 0);
42538348SEric.Yu@Sun.COM 		} else {
42548348SEric.Yu@Sun.COM 			icmp_err_ack(q, mp, 0, error);
42558348SEric.Yu@Sun.COM 		}
42568348SEric.Yu@Sun.COM 		return;
42578348SEric.Yu@Sun.COM 	}
42588348SEric.Yu@Sun.COM 
42598348SEric.Yu@Sun.COM 	/*
42608348SEric.Yu@Sun.COM 	 * Convert mp into a T_OK_ACK
42618348SEric.Yu@Sun.COM 	 */
42628348SEric.Yu@Sun.COM 
42638348SEric.Yu@Sun.COM 	mp = mi_tpi_ok_ack_alloc(mp);
42648348SEric.Yu@Sun.COM 
42658348SEric.Yu@Sun.COM 	/*
42668348SEric.Yu@Sun.COM 	 * should not happen in practice... T_OK_ACK is smaller than the
42678348SEric.Yu@Sun.COM 	 * original message.
42688348SEric.Yu@Sun.COM 	 */
42698348SEric.Yu@Sun.COM 	ASSERT(mp != NULL);
42708348SEric.Yu@Sun.COM 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
42715240Snordmark 	qreply(q, mp);
42720Sstevel@tonic-gate }
42730Sstevel@tonic-gate 
42748348SEric.Yu@Sun.COM 
42750Sstevel@tonic-gate /*
42760Sstevel@tonic-gate  * Process IPv4 packets that already include an IP header.
42770Sstevel@tonic-gate  * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and
42780Sstevel@tonic-gate  * IPPROTO_IGMP).
42790Sstevel@tonic-gate  */
42808348SEric.Yu@Sun.COM static int
42818348SEric.Yu@Sun.COM icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp,
42828348SEric.Yu@Sun.COM     ip4_pkt_t *pktinfop)
42830Sstevel@tonic-gate {
42843448Sdh155122 	icmp_stack_t *is = icmp->icmp_is;
42850Sstevel@tonic-gate 	ipha_t	*ipha;
42860Sstevel@tonic-gate 	int	ip_hdr_length;
42870Sstevel@tonic-gate 	int	tp_hdr_len;
42880Sstevel@tonic-gate 	mblk_t	*mp1;
42890Sstevel@tonic-gate 	uint_t	pkt_len;
42903318Srshoaib 	ip_opt_info_t optinfo;
42913318Srshoaib 
42923318Srshoaib 	optinfo.ip_opt_flags = 0;
42933318Srshoaib 	optinfo.ip_opt_ill_index = 0;
42940Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
42950Sstevel@tonic-gate 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
42960Sstevel@tonic-gate 	if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) {
42970Sstevel@tonic-gate 		if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
42983448Sdh155122 			ASSERT(icmp != NULL);
42995240Snordmark 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
43000Sstevel@tonic-gate 			freemsg(mp);
43018348SEric.Yu@Sun.COM 			return (0);
43020Sstevel@tonic-gate 		}
43030Sstevel@tonic-gate 		ipha = (ipha_t *)mp->b_rptr;
43040Sstevel@tonic-gate 	}
43050Sstevel@tonic-gate 	ipha->ipha_version_and_hdr_length =
43060Sstevel@tonic-gate 	    (IP_VERSION<<4) | (ip_hdr_length>>2);
43070Sstevel@tonic-gate 
43080Sstevel@tonic-gate 	/*
43090Sstevel@tonic-gate 	 * For the socket of SOCK_RAW type, the checksum is provided in the
43100Sstevel@tonic-gate 	 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to
43110Sstevel@tonic-gate 	 * tell IP that the application has sent a complete IP header and not
43120Sstevel@tonic-gate 	 * to compute the transport checksum nor change the DF flag.
43130Sstevel@tonic-gate 	 */
43140Sstevel@tonic-gate 	ipha->ipha_ident = IP_HDR_INCLUDED;
43150Sstevel@tonic-gate 	ipha->ipha_hdr_checksum = 0;
43160Sstevel@tonic-gate 	ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF);
43170Sstevel@tonic-gate 	/* Insert options if any */
43180Sstevel@tonic-gate 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
43190Sstevel@tonic-gate 		/*
43200Sstevel@tonic-gate 		 * Put the IP header plus any transport header that is
43210Sstevel@tonic-gate 		 * checksumed by ip_wput into the first mblk. (ip_wput assumes
43220Sstevel@tonic-gate 		 * that at least the checksum field is in the first mblk.)
43230Sstevel@tonic-gate 		 */
43240Sstevel@tonic-gate 		switch (ipha->ipha_protocol) {
43250Sstevel@tonic-gate 		case IPPROTO_UDP:
43260Sstevel@tonic-gate 			tp_hdr_len = 8;
43270Sstevel@tonic-gate 			break;
43280Sstevel@tonic-gate 		case IPPROTO_TCP:
43290Sstevel@tonic-gate 			tp_hdr_len = 20;
43300Sstevel@tonic-gate 			break;
43310Sstevel@tonic-gate 		default:
43320Sstevel@tonic-gate 			tp_hdr_len = 0;
43330Sstevel@tonic-gate 			break;
43340Sstevel@tonic-gate 		}
43350Sstevel@tonic-gate 		/*
43360Sstevel@tonic-gate 		 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus
43370Sstevel@tonic-gate 		 * tp_hdr_len bytes will be in a single mblk.
43380Sstevel@tonic-gate 		 */
43390Sstevel@tonic-gate 		if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH +
43400Sstevel@tonic-gate 		    tp_hdr_len)) {
43410Sstevel@tonic-gate 			if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH +
43420Sstevel@tonic-gate 			    tp_hdr_len)) {
43435240Snordmark 				BUMP_MIB(&is->is_rawip_mib,
43443448Sdh155122 				    rawipOutErrors);
43450Sstevel@tonic-gate 				freemsg(mp);
43468348SEric.Yu@Sun.COM 				return (0);
43470Sstevel@tonic-gate 			}
43480Sstevel@tonic-gate 			ipha = (ipha_t *)mp->b_rptr;
43490Sstevel@tonic-gate 		}
43500Sstevel@tonic-gate 
43510Sstevel@tonic-gate 		/*
43520Sstevel@tonic-gate 		 * if the length is larger then the max allowed IP packet,
43530Sstevel@tonic-gate 		 * then send an error and abort the processing.
43540Sstevel@tonic-gate 		 */
43550Sstevel@tonic-gate 		pkt_len = ntohs(ipha->ipha_length)
43560Sstevel@tonic-gate 		    + icmp->icmp_ip_snd_options_len;
43570Sstevel@tonic-gate 		if (pkt_len > IP_MAXPACKET) {
43588348SEric.Yu@Sun.COM 			return (EMSGSIZE);
43590Sstevel@tonic-gate 		}
43603448Sdh155122 		if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra +
43610Sstevel@tonic-gate 		    tp_hdr_len, BPRI_LO))) {
43628348SEric.Yu@Sun.COM 			return (ENOMEM);
43630Sstevel@tonic-gate 		}
43643448Sdh155122 		mp1->b_rptr += is->is_wroff_extra;
43650Sstevel@tonic-gate 		mp1->b_wptr = mp1->b_rptr + ip_hdr_length;
43660Sstevel@tonic-gate 
43670Sstevel@tonic-gate 		ipha->ipha_length = htons((uint16_t)pkt_len);
43680Sstevel@tonic-gate 		bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH);
43690Sstevel@tonic-gate 
43700Sstevel@tonic-gate 		/* Copy transport header if any */
43710Sstevel@tonic-gate 		bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len);
43720Sstevel@tonic-gate 		mp1->b_wptr += tp_hdr_len;
43730Sstevel@tonic-gate 
43740Sstevel@tonic-gate 		/* Add options */
43750Sstevel@tonic-gate 		ipha = (ipha_t *)mp1->b_rptr;
43760Sstevel@tonic-gate 		bcopy(icmp->icmp_ip_snd_options, &ipha[1],
43770Sstevel@tonic-gate 		    icmp->icmp_ip_snd_options_len);
43780Sstevel@tonic-gate 
43790Sstevel@tonic-gate 		/* Drop IP header and transport header from original */
43800Sstevel@tonic-gate 		(void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len);
43810Sstevel@tonic-gate 
43820Sstevel@tonic-gate 		mp1->b_cont = mp;
43830Sstevel@tonic-gate 		mp = mp1;
43840Sstevel@tonic-gate 		/*
43850Sstevel@tonic-gate 		 * Massage source route putting first source
43860Sstevel@tonic-gate 		 * route in ipha_dst.
43870Sstevel@tonic-gate 		 */
43885240Snordmark 		(void) ip_massage_options(ipha, is->is_netstack);
43890Sstevel@tonic-gate 	}
43903318Srshoaib 
43913318Srshoaib 	if (pktinfop != NULL) {
43923318Srshoaib 		/*
43933318Srshoaib 		 * Over write the source address provided in the header
43943318Srshoaib 		 */
43953318Srshoaib 		if (pktinfop->ip4_addr != INADDR_ANY) {
43963318Srshoaib 			ipha->ipha_src = pktinfop->ip4_addr;
43973318Srshoaib 			optinfo.ip_opt_flags = IP_VERIFY_SRC;
43983318Srshoaib 		}
43993318Srshoaib 
44003318Srshoaib 		if (pktinfop->ip4_ill_index != 0) {
44013318Srshoaib 			optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
44023318Srshoaib 		}
44033318Srshoaib 	}
44043318Srshoaib 
44058275SEric Cheng 	ip_output_options(connp, mp, q, IP_WPUT, &optinfo);
44068348SEric.Yu@Sun.COM 	return (0);
44070Sstevel@tonic-gate }
44080Sstevel@tonic-gate 
44098348SEric.Yu@Sun.COM static int
44108348SEric.Yu@Sun.COM icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst)
44111676Sjpk {
44121676Sjpk 	int err;
44131676Sjpk 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
44145240Snordmark 	icmp_stack_t		*is = icmp->icmp_is;
4415*8778SErik.Nordmark@Sun.COM 	conn_t			*connp = icmp->icmp_connp;
4416*8778SErik.Nordmark@Sun.COM 	cred_t			*cr;
4417*8778SErik.Nordmark@Sun.COM 
4418*8778SErik.Nordmark@Sun.COM 	/*
4419*8778SErik.Nordmark@Sun.COM 	 * All Solaris components should pass a db_credp
4420*8778SErik.Nordmark@Sun.COM 	 * for this message, hence we ASSERT.
4421*8778SErik.Nordmark@Sun.COM 	 * On production kernels we return an error to be robust against
4422*8778SErik.Nordmark@Sun.COM 	 * random streams modules sitting on top of us.
4423*8778SErik.Nordmark@Sun.COM 	 */
4424*8778SErik.Nordmark@Sun.COM 	cr = msg_getcred(mp, NULL);
4425*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
4426*8778SErik.Nordmark@Sun.COM 	if (cr == NULL)
4427*8778SErik.Nordmark@Sun.COM 		return (EINVAL);
4428*8778SErik.Nordmark@Sun.COM 
4429*8778SErik.Nordmark@Sun.COM 	err = tsol_compute_label(cr, dst,
44306596Skp158701 	    opt_storage, connp->conn_mac_exempt,
44315240Snordmark 	    is->is_netstack->netstack_ip);
44321676Sjpk 	if (err == 0) {
44331676Sjpk 		err = tsol_update_options(&icmp->icmp_ip_snd_options,
44341676Sjpk 		    &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len,
44351676Sjpk 		    opt_storage);
44361676Sjpk 	}
44371676Sjpk 	if (err != 0) {
44385240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
44391676Sjpk 		DTRACE_PROBE4(
44401676Sjpk 		    tx__ip__log__drop__updatelabel__icmp,
44418348SEric.Yu@Sun.COM 		    char *, "icmp(1) failed to update options(2) on mp(3)",
44428348SEric.Yu@Sun.COM 		    icmp_t *, icmp, char *, opt_storage, mblk_t *, mp);
44438348SEric.Yu@Sun.COM 		return (err);
44441676Sjpk 	}
44451676Sjpk 	IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst);
44468348SEric.Yu@Sun.COM 	return (0);
44471676Sjpk }
44481676Sjpk 
44490Sstevel@tonic-gate /*
44500Sstevel@tonic-gate  * This routine handles all messages passed downstream.  It either
44510Sstevel@tonic-gate  * consumes the message or passes it downstream; it never queues a
44520Sstevel@tonic-gate  * a message.
44530Sstevel@tonic-gate  */
44540Sstevel@tonic-gate static void
44550Sstevel@tonic-gate icmp_wput(queue_t *q, mblk_t *mp)
44560Sstevel@tonic-gate {
44570Sstevel@tonic-gate 	uchar_t	*rptr = mp->b_rptr;
44580Sstevel@tonic-gate 	ipha_t	*ipha;
44590Sstevel@tonic-gate 	mblk_t	*mp1;
44600Sstevel@tonic-gate #define	tudr ((struct T_unitdata_req *)rptr)
44610Sstevel@tonic-gate 	size_t	ip_len;
44625240Snordmark 	conn_t	*connp = Q_TO_CONN(q);
44635240Snordmark 	icmp_t	*icmp = connp->conn_icmp;
44643448Sdh155122 	icmp_stack_t *is = icmp->icmp_is;
44650Sstevel@tonic-gate 	sin6_t	*sin6;
44660Sstevel@tonic-gate 	sin_t	*sin;
44670Sstevel@tonic-gate 	ipaddr_t	v4dst;
44683318Srshoaib 	ip4_pkt_t	pktinfo;
44693318Srshoaib 	ip4_pkt_t	*pktinfop = &pktinfo;
44708348SEric.Yu@Sun.COM 	ip6_pkt_t	ipp_s;  /* For ancillary data options */
44718348SEric.Yu@Sun.COM 	ip6_pkt_t	*ipp = &ipp_s;
44728348SEric.Yu@Sun.COM 	int error;
44738348SEric.Yu@Sun.COM 
44748348SEric.Yu@Sun.COM 	ipp->ipp_fields = 0;
44758348SEric.Yu@Sun.COM 	ipp->ipp_sticky_ignored = 0;
44760Sstevel@tonic-gate 
44770Sstevel@tonic-gate 	switch (mp->b_datap->db_type) {
44780Sstevel@tonic-gate 	case M_DATA:
44790Sstevel@tonic-gate 		if (icmp->icmp_hdrincl) {
44800Sstevel@tonic-gate 			ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
44811676Sjpk 			ipha = (ipha_t *)mp->b_rptr;
44821676Sjpk 			if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) {
44831676Sjpk 				if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
44845240Snordmark 					BUMP_MIB(&is->is_rawip_mib,
44853448Sdh155122 					    rawipOutErrors);
44861676Sjpk 					freemsg(mp);
44871676Sjpk 					return;
44881676Sjpk 				}
44891676Sjpk 				ipha = (ipha_t *)mp->b_rptr;
44901676Sjpk 			}
44911676Sjpk 			/*
44921676Sjpk 			 * If this connection was used for v6 (inconceivable!)
44931676Sjpk 			 * or if we have a new destination, then it's time to
44941676Sjpk 			 * figure a new label.
44951676Sjpk 			 */
44961676Sjpk 			if (is_system_labeled() &&
44971676Sjpk 			    (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
44981676Sjpk 			    V4_PART_OF_V6(icmp->icmp_v6lastdst) !=
44998348SEric.Yu@Sun.COM 			    ipha->ipha_dst)) {
45008348SEric.Yu@Sun.COM 				error = icmp_update_label(icmp, mp,
45018348SEric.Yu@Sun.COM 				    ipha->ipha_dst);
45028348SEric.Yu@Sun.COM 				if (error != 0) {
45038348SEric.Yu@Sun.COM 					icmp_ud_err(q, mp, error);
45048348SEric.Yu@Sun.COM 					return;
45058348SEric.Yu@Sun.COM 				}
45061676Sjpk 			}
45078348SEric.Yu@Sun.COM 			error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL);
45088348SEric.Yu@Sun.COM 			if (error != 0)
45098348SEric.Yu@Sun.COM 				icmp_ud_err(q, mp, error);
45100Sstevel@tonic-gate 			return;
45110Sstevel@tonic-gate 		}
45120Sstevel@tonic-gate 		freemsg(mp);
45130Sstevel@tonic-gate 		return;
45140Sstevel@tonic-gate 	case M_PROTO:
45150Sstevel@tonic-gate 	case M_PCPROTO:
45160Sstevel@tonic-gate 		ip_len = mp->b_wptr - rptr;
45170Sstevel@tonic-gate 		if (ip_len >= sizeof (struct T_unitdata_req)) {
45180Sstevel@tonic-gate 			/* Expedite valid T_UNITDATA_REQ to below the switch */
45190Sstevel@tonic-gate 			if (((union T_primitives *)rptr)->type
45200Sstevel@tonic-gate 			    == T_UNITDATA_REQ)
45210Sstevel@tonic-gate 				break;
45220Sstevel@tonic-gate 		}
45230Sstevel@tonic-gate 		/* FALLTHRU */
45240Sstevel@tonic-gate 	default:
45250Sstevel@tonic-gate 		icmp_wput_other(q, mp);
45260Sstevel@tonic-gate 		return;
45270Sstevel@tonic-gate 	}
45280Sstevel@tonic-gate 
45290Sstevel@tonic-gate 	/* Handle T_UNITDATA_REQ messages here. */
45300Sstevel@tonic-gate 
45310Sstevel@tonic-gate 	mp1 = mp->b_cont;
45320Sstevel@tonic-gate 	if (mp1 == NULL) {
45335240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
45340Sstevel@tonic-gate 		icmp_ud_err(q, mp, EPROTO);
45350Sstevel@tonic-gate 		return;
45360Sstevel@tonic-gate 	}
45370Sstevel@tonic-gate 
45380Sstevel@tonic-gate 	if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) {
45395240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
45400Sstevel@tonic-gate 		icmp_ud_err(q, mp, EADDRNOTAVAIL);
45410Sstevel@tonic-gate 		return;
45420Sstevel@tonic-gate 	}
45430Sstevel@tonic-gate 
45440Sstevel@tonic-gate 	switch (icmp->icmp_family) {
45450Sstevel@tonic-gate 	case AF_INET6:
45460Sstevel@tonic-gate 		sin6 = (sin6_t *)&rptr[tudr->DEST_offset];
45470Sstevel@tonic-gate 		if (!OK_32PTR((char *)sin6) ||
45480Sstevel@tonic-gate 		    tudr->DEST_length != sizeof (sin6_t) ||
45490Sstevel@tonic-gate 		    sin6->sin6_family != AF_INET6) {
45505240Snordmark 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
45510Sstevel@tonic-gate 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
45520Sstevel@tonic-gate 			return;
45530Sstevel@tonic-gate 		}
45540Sstevel@tonic-gate 
45550Sstevel@tonic-gate 		/* No support for mapped addresses on raw sockets */
45560Sstevel@tonic-gate 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
45575240Snordmark 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
45580Sstevel@tonic-gate 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
45590Sstevel@tonic-gate 			return;
45600Sstevel@tonic-gate 		}
45610Sstevel@tonic-gate 
45620Sstevel@tonic-gate 		/*
45630Sstevel@tonic-gate 		 * Destination is a native IPv6 address.
45640Sstevel@tonic-gate 		 * Send out an IPv6 format packet.
45650Sstevel@tonic-gate 		 */
45668348SEric.Yu@Sun.COM 		if (tudr->OPT_length != 0) {
45678348SEric.Yu@Sun.COM 			int error;
45688348SEric.Yu@Sun.COM 
45698348SEric.Yu@Sun.COM 			error = 0;
45708348SEric.Yu@Sun.COM 			if (icmp_unitdata_opt_process(q, mp, &error,
45718348SEric.Yu@Sun.COM 			    (void *)ipp) < 0) {
45728348SEric.Yu@Sun.COM 				/* failure */
45738348SEric.Yu@Sun.COM 				BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
45748348SEric.Yu@Sun.COM 				icmp_ud_err(q, mp, error);
45758348SEric.Yu@Sun.COM 				return;
45768348SEric.Yu@Sun.COM 			}
45778348SEric.Yu@Sun.COM 			ASSERT(error == 0);
45788348SEric.Yu@Sun.COM 		}
45798348SEric.Yu@Sun.COM 
45808348SEric.Yu@Sun.COM 		error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp);
45818348SEric.Yu@Sun.COM 		goto done;
45820Sstevel@tonic-gate 
45830Sstevel@tonic-gate 	case AF_INET:
45840Sstevel@tonic-gate 		sin = (sin_t *)&rptr[tudr->DEST_offset];
45850Sstevel@tonic-gate 		if (!OK_32PTR((char *)sin) ||
45860Sstevel@tonic-gate 		    tudr->DEST_length != sizeof (sin_t) ||
45870Sstevel@tonic-gate 		    sin->sin_family != AF_INET) {
45885240Snordmark 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
45890Sstevel@tonic-gate 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
45900Sstevel@tonic-gate 			return;
45910Sstevel@tonic-gate 		}
45920Sstevel@tonic-gate 		/* Extract and ipaddr */
45930Sstevel@tonic-gate 		v4dst = sin->sin_addr.s_addr;
45940Sstevel@tonic-gate 		break;
45951676Sjpk 
45961676Sjpk 	default:
45971676Sjpk 		ASSERT(0);
45980Sstevel@tonic-gate 	}
45990Sstevel@tonic-gate 
46003318Srshoaib 	pktinfop->ip4_ill_index = 0;
46013318Srshoaib 	pktinfop->ip4_addr = INADDR_ANY;
46023318Srshoaib 
46030Sstevel@tonic-gate 	/*
46040Sstevel@tonic-gate 	 * If options passed in, feed it for verification and handling
46050Sstevel@tonic-gate 	 */
46060Sstevel@tonic-gate 	if (tudr->OPT_length != 0) {
46070Sstevel@tonic-gate 		int error;
46080Sstevel@tonic-gate 
46093318Srshoaib 		error = 0;
46100Sstevel@tonic-gate 		if (icmp_unitdata_opt_process(q, mp, &error,
46113318Srshoaib 		    (void *)pktinfop) < 0) {
46120Sstevel@tonic-gate 			/* failure */
46135240Snordmark 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
46140Sstevel@tonic-gate 			icmp_ud_err(q, mp, error);
46150Sstevel@tonic-gate 			return;
46160Sstevel@tonic-gate 		}
46173318Srshoaib 		ASSERT(error == 0);
46180Sstevel@tonic-gate 		/*
46190Sstevel@tonic-gate 		 * Note: Success in processing options.
46200Sstevel@tonic-gate 		 * mp option buffer represented by
46210Sstevel@tonic-gate 		 * OPT_length/offset now potentially modified
46220Sstevel@tonic-gate 		 * and contain option setting results
46230Sstevel@tonic-gate 		 */
46248348SEric.Yu@Sun.COM 	}
46258348SEric.Yu@Sun.COM 
46268348SEric.Yu@Sun.COM 	error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop);
46278348SEric.Yu@Sun.COM done:
46288348SEric.Yu@Sun.COM 	if (error != 0) {
46298348SEric.Yu@Sun.COM 		icmp_ud_err(q, mp, error);
46308348SEric.Yu@Sun.COM 		return;
46318348SEric.Yu@Sun.COM 	} else {
46328348SEric.Yu@Sun.COM 		mp->b_cont = NULL;
46338348SEric.Yu@Sun.COM 		freeb(mp);
46348348SEric.Yu@Sun.COM 	}
46358348SEric.Yu@Sun.COM }
46368348SEric.Yu@Sun.COM 
46378348SEric.Yu@Sun.COM 
46388348SEric.Yu@Sun.COM /* ARGSUSED */
46398348SEric.Yu@Sun.COM static void
46408348SEric.Yu@Sun.COM icmp_wput_fallback(queue_t *q, mblk_t *mp)
46418348SEric.Yu@Sun.COM {
46428348SEric.Yu@Sun.COM #ifdef DEBUG
46438348SEric.Yu@Sun.COM 	cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n");
46448348SEric.Yu@Sun.COM #endif
46458348SEric.Yu@Sun.COM 	freemsg(mp);
46468348SEric.Yu@Sun.COM }
46478348SEric.Yu@Sun.COM 
46488348SEric.Yu@Sun.COM static int
46498348SEric.Yu@Sun.COM raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst,
46508348SEric.Yu@Sun.COM     ip4_pkt_t *pktinfop)
46518348SEric.Yu@Sun.COM {
46528348SEric.Yu@Sun.COM 	ipha_t	*ipha;
46538348SEric.Yu@Sun.COM 	size_t	ip_len;
46548348SEric.Yu@Sun.COM 	icmp_t	*icmp = connp->conn_icmp;
46558348SEric.Yu@Sun.COM 	icmp_stack_t *is = icmp->icmp_is;
46568348SEric.Yu@Sun.COM 	int	ip_hdr_length;
46578348SEric.Yu@Sun.COM 	ip_opt_info_t	optinfo;
46588348SEric.Yu@Sun.COM 
46598348SEric.Yu@Sun.COM 	optinfo.ip_opt_flags = 0;
46608348SEric.Yu@Sun.COM 	optinfo.ip_opt_ill_index = 0;
46618348SEric.Yu@Sun.COM 
46628348SEric.Yu@Sun.COM 	if (icmp->icmp_state == TS_UNBND) {
46638348SEric.Yu@Sun.COM 		/* If a port has not been bound to the stream, fail. */
46648348SEric.Yu@Sun.COM 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
46658348SEric.Yu@Sun.COM 		return (EPROTO);
46660Sstevel@tonic-gate 	}
46670Sstevel@tonic-gate 
46681676Sjpk 	if (v4dst == INADDR_ANY)
46691676Sjpk 		v4dst = htonl(INADDR_LOOPBACK);
46701676Sjpk 
46711676Sjpk 	/* Check if our saved options are valid; update if not */
46721676Sjpk 	if (is_system_labeled() &&
46731676Sjpk 	    (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
46748348SEric.Yu@Sun.COM 	    V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) {
46758348SEric.Yu@Sun.COM 		int error = icmp_update_label(icmp, mp, v4dst);
46768348SEric.Yu@Sun.COM 
46778348SEric.Yu@Sun.COM 		if (error != 0)
46788348SEric.Yu@Sun.COM 			return (error);
46791676Sjpk 	}
46801676Sjpk 
46810Sstevel@tonic-gate 	/* Protocol 255 contains full IP headers */
46828348SEric.Yu@Sun.COM 	if (icmp->icmp_hdrincl)
46838348SEric.Yu@Sun.COM 		return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop));
46843318Srshoaib 
46850Sstevel@tonic-gate 	/* Add an IP header */
46860Sstevel@tonic-gate 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
46878348SEric.Yu@Sun.COM 	ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length];
46888348SEric.Yu@Sun.COM 	if ((uchar_t *)ipha < mp->b_datap->db_base ||
46898348SEric.Yu@Sun.COM 	    mp->b_datap->db_ref != 1 ||
46900Sstevel@tonic-gate 	    !OK_32PTR(ipha)) {
46918348SEric.Yu@Sun.COM 		mblk_t	*mp1;
46923448Sdh155122 		if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra,
46930Sstevel@tonic-gate 		    BPRI_LO))) {
46945240Snordmark 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
46958348SEric.Yu@Sun.COM 			return (ENOMEM);
46960Sstevel@tonic-gate 		}
46978348SEric.Yu@Sun.COM 		mp1->b_cont = mp;
46980Sstevel@tonic-gate 		ipha = (ipha_t *)mp1->b_datap->db_lim;
46990Sstevel@tonic-gate 		mp1->b_wptr = (uchar_t *)ipha;
47000Sstevel@tonic-gate 		ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length);
47018348SEric.Yu@Sun.COM 		mp = mp1;
47020Sstevel@tonic-gate 	}
47030Sstevel@tonic-gate #ifdef	_BIG_ENDIAN
47040Sstevel@tonic-gate 	/* Set version, header length, and tos */
47050Sstevel@tonic-gate 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
47060Sstevel@tonic-gate 	    ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) |
47075240Snordmark 	    icmp->icmp_type_of_service);
47080Sstevel@tonic-gate 	/* Set ttl and protocol */
47090Sstevel@tonic-gate 	*(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto;
47100Sstevel@tonic-gate #else
47110Sstevel@tonic-gate 	/* Set version, header length, and tos */
47120Sstevel@tonic-gate 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
47130Sstevel@tonic-gate 	    ((icmp->icmp_type_of_service << 8) |
47145240Snordmark 	    ((IP_VERSION << 4) | (ip_hdr_length>>2)));
47150Sstevel@tonic-gate 	/* Set ttl and protocol */
47160Sstevel@tonic-gate 	*(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl;
47170Sstevel@tonic-gate #endif
47183318Srshoaib 	if (pktinfop->ip4_addr != INADDR_ANY) {
47193318Srshoaib 		ipha->ipha_src = pktinfop->ip4_addr;
47203318Srshoaib 		optinfo.ip_opt_flags = IP_VERIFY_SRC;
47213318Srshoaib 	} else {
47223318Srshoaib 
47233318Srshoaib 		/*
47243318Srshoaib 		 * Copy our address into the packet.  If this is zero,
47253318Srshoaib 		 * ip will fill in the real source address.
47263318Srshoaib 		 */
47273318Srshoaib 		IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src);
47283318Srshoaib 	}
47293318Srshoaib 
47300Sstevel@tonic-gate 	ipha->ipha_fragment_offset_and_flags = 0;
47310Sstevel@tonic-gate 
47323318Srshoaib 	if (pktinfop->ip4_ill_index != 0) {
47333318Srshoaib 		optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
47343318Srshoaib 	}
47353318Srshoaib 
47363318Srshoaib 
47370Sstevel@tonic-gate 	/*
47380Sstevel@tonic-gate 	 * For the socket of SOCK_RAW type, the checksum is provided in the
47390Sstevel@tonic-gate 	 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to
47400Sstevel@tonic-gate 	 * tell IP that the application has sent a complete IP header and not
47410Sstevel@tonic-gate 	 * to compute the transport checksum nor change the DF flag.
47420Sstevel@tonic-gate 	 */
47430Sstevel@tonic-gate 	ipha->ipha_ident = IP_HDR_INCLUDED;
47440Sstevel@tonic-gate 
47450Sstevel@tonic-gate 	/* Finish common formatting of the packet. */
47468348SEric.Yu@Sun.COM 	mp->b_rptr = (uchar_t *)ipha;
47478348SEric.Yu@Sun.COM 
47488348SEric.Yu@Sun.COM 	ip_len = mp->b_wptr - (uchar_t *)ipha;
47498348SEric.Yu@Sun.COM 	if (mp->b_cont != NULL)
47508348SEric.Yu@Sun.COM 		ip_len += msgdsize(mp->b_cont);
47510Sstevel@tonic-gate 
47520Sstevel@tonic-gate 	/*
47530Sstevel@tonic-gate 	 * Set the length into the IP header.
47540Sstevel@tonic-gate 	 * If the length is greater than the maximum allowed by IP,
47550Sstevel@tonic-gate 	 * then free the message and return. Do not try and send it
47560Sstevel@tonic-gate 	 * as this can cause problems in layers below.
47570Sstevel@tonic-gate 	 */
47580Sstevel@tonic-gate 	if (ip_len > IP_MAXPACKET) {
47595240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
47608348SEric.Yu@Sun.COM 		return (EMSGSIZE);
47610Sstevel@tonic-gate 	}
47620Sstevel@tonic-gate 	ipha->ipha_length = htons((uint16_t)ip_len);
47630Sstevel@tonic-gate 	/*
47648348SEric.Yu@Sun.COM 	 * Copy in the destination address request
47650Sstevel@tonic-gate 	 */
47661676Sjpk 	ipha->ipha_dst = v4dst;
47670Sstevel@tonic-gate 
47680Sstevel@tonic-gate 	/*
47690Sstevel@tonic-gate 	 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic.
47700Sstevel@tonic-gate 	 */
47710Sstevel@tonic-gate 	if (CLASSD(v4dst))
47720Sstevel@tonic-gate 		ipha->ipha_ttl = icmp->icmp_multicast_ttl;
47730Sstevel@tonic-gate 
47740Sstevel@tonic-gate 	/* Copy in options if any */
47750Sstevel@tonic-gate 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
47760Sstevel@tonic-gate 		bcopy(icmp->icmp_ip_snd_options,
47770Sstevel@tonic-gate 		    &ipha[1], icmp->icmp_ip_snd_options_len);
47780Sstevel@tonic-gate 		/*
47790Sstevel@tonic-gate 		 * Massage source route putting first source route in ipha_dst.
47800Sstevel@tonic-gate 		 * Ignore the destination in the T_unitdata_req.
47810Sstevel@tonic-gate 		 */
47825240Snordmark 		(void) ip_massage_options(ipha, is->is_netstack);
47830Sstevel@tonic-gate 	}
47843318Srshoaib 
47855240Snordmark 	BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
47868348SEric.Yu@Sun.COM 	ip_output_options(connp, mp, q, IP_WPUT, &optinfo);
47878348SEric.Yu@Sun.COM 	return (0);
47880Sstevel@tonic-gate }
47890Sstevel@tonic-gate 
47908348SEric.Yu@Sun.COM static int
47918348SEric.Yu@Sun.COM icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst)
47921676Sjpk {
47931676Sjpk 	int err;
47941676Sjpk 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
47955240Snordmark 	icmp_stack_t		*is = icmp->icmp_is;
4796*8778SErik.Nordmark@Sun.COM 	conn_t			*connp = icmp->icmp_connp;
4797*8778SErik.Nordmark@Sun.COM 	cred_t			*cr;
4798*8778SErik.Nordmark@Sun.COM 
4799*8778SErik.Nordmark@Sun.COM 	/*
4800*8778SErik.Nordmark@Sun.COM 	 * All Solaris components should pass a db_credp
4801*8778SErik.Nordmark@Sun.COM 	 * for this message, hence we ASSERT.
4802*8778SErik.Nordmark@Sun.COM 	 * On production kernels we return an error to be robust against
4803*8778SErik.Nordmark@Sun.COM 	 * random streams modules sitting on top of us.
4804*8778SErik.Nordmark@Sun.COM 	 */
4805*8778SErik.Nordmark@Sun.COM 	cr = msg_getcred(mp, NULL);
4806*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
4807*8778SErik.Nordmark@Sun.COM 	if (cr == NULL)
4808*8778SErik.Nordmark@Sun.COM 		return (EINVAL);
4809*8778SErik.Nordmark@Sun.COM 
4810*8778SErik.Nordmark@Sun.COM 	err = tsol_compute_label_v6(cr, dst,
48116596Skp158701 	    opt_storage, connp->conn_mac_exempt,
48125240Snordmark 	    is->is_netstack->netstack_ip);
48131676Sjpk 	if (err == 0) {
48141676Sjpk 		err = tsol_update_sticky(&icmp->icmp_sticky_ipp,
48151676Sjpk 		    &icmp->icmp_label_len_v6, opt_storage);
48161676Sjpk 	}
48171676Sjpk 	if (err != 0) {
48185240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
48191676Sjpk 		DTRACE_PROBE4(
48201676Sjpk 		    tx__ip__log__drop__updatelabel__icmp6,
48218348SEric.Yu@Sun.COM 		    char *, "icmp(1) failed to update options(2) on mp(3)",
48228348SEric.Yu@Sun.COM 		    icmp_t *, icmp, char *, opt_storage, mblk_t *, mp);
48238348SEric.Yu@Sun.COM 		return (err);
48241676Sjpk 	}
48251676Sjpk 
48261676Sjpk 	icmp->icmp_v6lastdst = *dst;
48278348SEric.Yu@Sun.COM 	return (0);
48281676Sjpk }
48291676Sjpk 
48300Sstevel@tonic-gate /*
48318348SEric.Yu@Sun.COM  * raw_ip_send_data_v6():
48320Sstevel@tonic-gate  * Assumes that icmp_wput did some sanity checking on the destination
48331676Sjpk  * address, but that the label may not yet be correct.
48340Sstevel@tonic-gate  */
48358348SEric.Yu@Sun.COM static int
48368348SEric.Yu@Sun.COM raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6,
48378348SEric.Yu@Sun.COM     ip6_pkt_t *ipp)
48380Sstevel@tonic-gate {
48390Sstevel@tonic-gate 	ip6_t			*ip6h;
48408348SEric.Yu@Sun.COM 	ip6i_t			*ip6i;	/* mp->b_rptr even if no ip6i_t */
48410Sstevel@tonic-gate 	int			ip_hdr_len = IPV6_HDR_LEN;
48420Sstevel@tonic-gate 	size_t			ip_len;
48438348SEric.Yu@Sun.COM 	icmp_t			*icmp = connp->conn_icmp;
48443448Sdh155122 	icmp_stack_t		*is = icmp->icmp_is;
48450Sstevel@tonic-gate 	ip6_pkt_t		*tipp;
48460Sstevel@tonic-gate 	uint32_t		csum = 0;
48470Sstevel@tonic-gate 	uint_t			ignore = 0;
48480Sstevel@tonic-gate 	uint_t			option_exists = 0, is_sticky = 0;
48490Sstevel@tonic-gate 	uint8_t			*cp;
48500Sstevel@tonic-gate 	uint8_t			*nxthdr_ptr;
48511676Sjpk 	in6_addr_t		ip6_dst;
48520Sstevel@tonic-gate 
48530Sstevel@tonic-gate 	/*
48540Sstevel@tonic-gate 	 * If the local address is a mapped address return
48550Sstevel@tonic-gate 	 * an error.
48560Sstevel@tonic-gate 	 * It would be possible to send an IPv6 packet but the
48570Sstevel@tonic-gate 	 * response would never make it back to the application
48580Sstevel@tonic-gate 	 * since it is bound to a mapped address.
48590Sstevel@tonic-gate 	 */
48600Sstevel@tonic-gate 	if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) {
48615240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
48628348SEric.Yu@Sun.COM 		return (EADDRNOTAVAIL);
48638348SEric.Yu@Sun.COM 	}
48648348SEric.Yu@Sun.COM 
48658348SEric.Yu@Sun.COM 	ignore = ipp->ipp_sticky_ignored;
48660Sstevel@tonic-gate 	if (sin6->sin6_scope_id != 0 &&
48670Sstevel@tonic-gate 	    IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
48680Sstevel@tonic-gate 		/*
48690Sstevel@tonic-gate 		 * IPPF_SCOPE_ID is special.  It's neither a sticky
48700Sstevel@tonic-gate 		 * option nor ancillary data.  It needs to be
48710Sstevel@tonic-gate 		 * explicitly set in options_exists.
48720Sstevel@tonic-gate 		 */
48730Sstevel@tonic-gate 		option_exists |= IPPF_SCOPE_ID;
48740Sstevel@tonic-gate 	}
48750Sstevel@tonic-gate 
48761676Sjpk 	/*
48771676Sjpk 	 * Compute the destination address
48781676Sjpk 	 */
48791676Sjpk 	ip6_dst = sin6->sin6_addr;
48801676Sjpk 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
48811676Sjpk 		ip6_dst = ipv6_loopback;
48821676Sjpk 
48831676Sjpk 	/*
48841676Sjpk 	 * If we're not going to the same destination as last time, then
48851676Sjpk 	 * recompute the label required.  This is done in a separate routine to
48861676Sjpk 	 * avoid blowing up our stack here.
48871676Sjpk 	 */
48881676Sjpk 	if (is_system_labeled() &&
48898348SEric.Yu@Sun.COM 	    !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) {
48908348SEric.Yu@Sun.COM 		int error = 0;
48918348SEric.Yu@Sun.COM 
48928348SEric.Yu@Sun.COM 		error = icmp_update_label_v6(icmp, mp, &ip6_dst);
48938348SEric.Yu@Sun.COM 		if (error != 0)
48948348SEric.Yu@Sun.COM 			return (error);
48951676Sjpk 	}
48961676Sjpk 
48971676Sjpk 	/*
48981676Sjpk 	 * If there's a security label here, then we ignore any options the
48991676Sjpk 	 * user may try to set.  We keep the peer's label as a hidden sticky
49001676Sjpk 	 * option.
49011676Sjpk 	 */
49021676Sjpk 	if (icmp->icmp_label_len_v6 > 0) {
49031676Sjpk 		ignore &= ~IPPF_HOPOPTS;
49041676Sjpk 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
49051676Sjpk 	}
49061676Sjpk 
49070Sstevel@tonic-gate 	if ((icmp->icmp_sticky_ipp.ipp_fields == 0) &&
49080Sstevel@tonic-gate 	    (ipp->ipp_fields == 0)) {
49090Sstevel@tonic-gate 		/* No sticky options nor ancillary data. */
49100Sstevel@tonic-gate 		goto no_options;
49110Sstevel@tonic-gate 	}
49120Sstevel@tonic-gate 
49130Sstevel@tonic-gate 	/*
49140Sstevel@tonic-gate 	 * Go through the options figuring out where each is going to
49150Sstevel@tonic-gate 	 * come from and build two masks.  The first mask indicates if
49160Sstevel@tonic-gate 	 * the option exists at all.  The second mask indicates if the
49170Sstevel@tonic-gate 	 * option is sticky or ancillary.
49180Sstevel@tonic-gate 	 */
49190Sstevel@tonic-gate 	if (!(ignore & IPPF_HOPOPTS)) {
49200Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_HOPOPTS) {
49210Sstevel@tonic-gate 			option_exists |= IPPF_HOPOPTS;
49220Sstevel@tonic-gate 			ip_hdr_len += ipp->ipp_hopoptslen;
49230Sstevel@tonic-gate 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) {
49240Sstevel@tonic-gate 			option_exists |= IPPF_HOPOPTS;
49250Sstevel@tonic-gate 			is_sticky |= IPPF_HOPOPTS;
49260Sstevel@tonic-gate 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen;
49270Sstevel@tonic-gate 		}
49280Sstevel@tonic-gate 	}
49290Sstevel@tonic-gate 
49300Sstevel@tonic-gate 	if (!(ignore & IPPF_RTHDR)) {
49310Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_RTHDR) {
49320Sstevel@tonic-gate 			option_exists |= IPPF_RTHDR;
49330Sstevel@tonic-gate 			ip_hdr_len += ipp->ipp_rthdrlen;
49340Sstevel@tonic-gate 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) {
49350Sstevel@tonic-gate 			option_exists |= IPPF_RTHDR;
49360Sstevel@tonic-gate 			is_sticky |= IPPF_RTHDR;
49370Sstevel@tonic-gate 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen;
49380Sstevel@tonic-gate 		}
49390Sstevel@tonic-gate 	}
49400Sstevel@tonic-gate 
49410Sstevel@tonic-gate 	if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) {
49420Sstevel@tonic-gate 		/*
49430Sstevel@tonic-gate 		 * Need to have a router header to use these.
49440Sstevel@tonic-gate 		 */
49450Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_RTDSTOPTS) {
49460Sstevel@tonic-gate 			option_exists |= IPPF_RTDSTOPTS;
49470Sstevel@tonic-gate 			ip_hdr_len += ipp->ipp_rtdstoptslen;
49480Sstevel@tonic-gate 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) {
49490Sstevel@tonic-gate 			option_exists |= IPPF_RTDSTOPTS;
49500Sstevel@tonic-gate 			is_sticky |= IPPF_RTDSTOPTS;
49510Sstevel@tonic-gate 			ip_hdr_len +=
49520Sstevel@tonic-gate 			    icmp->icmp_sticky_ipp.ipp_rtdstoptslen;
49530Sstevel@tonic-gate 		}
49540Sstevel@tonic-gate 	}
49550Sstevel@tonic-gate 
49560Sstevel@tonic-gate 	if (!(ignore & IPPF_DSTOPTS)) {
49570Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_DSTOPTS) {
49580Sstevel@tonic-gate 			option_exists |= IPPF_DSTOPTS;
49590Sstevel@tonic-gate 			ip_hdr_len += ipp->ipp_dstoptslen;
49600Sstevel@tonic-gate 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) {
49610Sstevel@tonic-gate 			option_exists |= IPPF_DSTOPTS;
49620Sstevel@tonic-gate 			is_sticky |= IPPF_DSTOPTS;
49630Sstevel@tonic-gate 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen;
49640Sstevel@tonic-gate 		}
49650Sstevel@tonic-gate 	}
49660Sstevel@tonic-gate 
49670Sstevel@tonic-gate 	if (!(ignore & IPPF_IFINDEX)) {
49680Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_IFINDEX) {
49690Sstevel@tonic-gate 			option_exists |= IPPF_IFINDEX;
49700Sstevel@tonic-gate 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) {
49710Sstevel@tonic-gate 			option_exists |= IPPF_IFINDEX;
49720Sstevel@tonic-gate 			is_sticky |= IPPF_IFINDEX;
49730Sstevel@tonic-gate 		}
49740Sstevel@tonic-gate 	}
49750Sstevel@tonic-gate 
49760Sstevel@tonic-gate 	if (!(ignore & IPPF_ADDR)) {
49770Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_ADDR) {
49780Sstevel@tonic-gate 			option_exists |= IPPF_ADDR;
49790Sstevel@tonic-gate 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) {
49800Sstevel@tonic-gate 			option_exists |= IPPF_ADDR;
49810Sstevel@tonic-gate 			is_sticky |= IPPF_ADDR;
49820Sstevel@tonic-gate 		}
49830Sstevel@tonic-gate 	}
49840Sstevel@tonic-gate 
49850Sstevel@tonic-gate 	if (!(ignore & IPPF_DONTFRAG)) {
49860Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_DONTFRAG) {
49870Sstevel@tonic-gate 			option_exists |= IPPF_DONTFRAG;
49880Sstevel@tonic-gate 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) {
49890Sstevel@tonic-gate 			option_exists |= IPPF_DONTFRAG;
49900Sstevel@tonic-gate 			is_sticky |= IPPF_DONTFRAG;
49910Sstevel@tonic-gate 		}
49920Sstevel@tonic-gate 	}
49930Sstevel@tonic-gate 
49940Sstevel@tonic-gate 	if (!(ignore & IPPF_USE_MIN_MTU)) {
49950Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_USE_MIN_MTU) {
49960Sstevel@tonic-gate 			option_exists |= IPPF_USE_MIN_MTU;
49970Sstevel@tonic-gate 		} else if (icmp->icmp_sticky_ipp.ipp_fields &
49980Sstevel@tonic-gate 		    IPPF_USE_MIN_MTU) {
49990Sstevel@tonic-gate 			option_exists |= IPPF_USE_MIN_MTU;
50000Sstevel@tonic-gate 			is_sticky |= IPPF_USE_MIN_MTU;
50010Sstevel@tonic-gate 		}
50020Sstevel@tonic-gate 	}
50030Sstevel@tonic-gate 
50040Sstevel@tonic-gate 	if (!(ignore & IPPF_NEXTHOP)) {
50050Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_NEXTHOP) {
50060Sstevel@tonic-gate 			option_exists |= IPPF_NEXTHOP;
50070Sstevel@tonic-gate 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) {
50080Sstevel@tonic-gate 			option_exists |= IPPF_NEXTHOP;
50090Sstevel@tonic-gate 			is_sticky |= IPPF_NEXTHOP;
50100Sstevel@tonic-gate 		}
50110Sstevel@tonic-gate 	}
50120Sstevel@tonic-gate 
5013679Sseb 	if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT))
5014679Sseb 		option_exists |= IPPF_HOPLIMIT;
5015679Sseb 	/* IPV6_HOPLIMIT can never be sticky */
5016679Sseb 	ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT));
5017679Sseb 
5018679Sseb 	if (!(ignore & IPPF_UNICAST_HOPS) &&
5019679Sseb 	    (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) {
5020679Sseb 		option_exists |= IPPF_UNICAST_HOPS;
5021679Sseb 		is_sticky |= IPPF_UNICAST_HOPS;
5022679Sseb 	}
5023679Sseb 
5024679Sseb 	if (!(ignore & IPPF_MULTICAST_HOPS) &&
5025679Sseb 	    (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) {
5026679Sseb 		option_exists |= IPPF_MULTICAST_HOPS;
5027679Sseb 		is_sticky |= IPPF_MULTICAST_HOPS;
50280Sstevel@tonic-gate 	}
50290Sstevel@tonic-gate 
50300Sstevel@tonic-gate 	if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) {
50310Sstevel@tonic-gate 		/* This is a sticky socket option only */
50320Sstevel@tonic-gate 		option_exists |= IPPF_NO_CKSUM;
50330Sstevel@tonic-gate 		is_sticky |= IPPF_NO_CKSUM;
50340Sstevel@tonic-gate 	}
50350Sstevel@tonic-gate 
50360Sstevel@tonic-gate 	if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) {
50370Sstevel@tonic-gate 		/* This is a sticky socket option only */
50380Sstevel@tonic-gate 		option_exists |= IPPF_RAW_CKSUM;
50390Sstevel@tonic-gate 		is_sticky |= IPPF_RAW_CKSUM;
50400Sstevel@tonic-gate 	}
50410Sstevel@tonic-gate 
50420Sstevel@tonic-gate 	if (!(ignore & IPPF_TCLASS)) {
50430Sstevel@tonic-gate 		if (ipp->ipp_fields & IPPF_TCLASS) {
50440Sstevel@tonic-gate 			option_exists |= IPPF_TCLASS;
50450Sstevel@tonic-gate 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) {
50460Sstevel@tonic-gate 			option_exists |= IPPF_TCLASS;
50470Sstevel@tonic-gate 			is_sticky |= IPPF_TCLASS;
50480Sstevel@tonic-gate 		}
50490Sstevel@tonic-gate 	}
50500Sstevel@tonic-gate 
50510Sstevel@tonic-gate no_options:
50520Sstevel@tonic-gate 
50530Sstevel@tonic-gate 	/*
50540Sstevel@tonic-gate 	 * If any options carried in the ip6i_t were specified, we
50550Sstevel@tonic-gate 	 * need to account for the ip6i_t in the data we'll be sending
50560Sstevel@tonic-gate 	 * down.
50570Sstevel@tonic-gate 	 */
50580Sstevel@tonic-gate 	if (option_exists & IPPF_HAS_IP6I)
50590Sstevel@tonic-gate 		ip_hdr_len += sizeof (ip6i_t);
50600Sstevel@tonic-gate 
50610Sstevel@tonic-gate 	/* check/fix buffer config, setup pointers into it */
50628348SEric.Yu@Sun.COM 	ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len];
50638348SEric.Yu@Sun.COM 	if ((mp->b_datap->db_ref != 1) ||
50648348SEric.Yu@Sun.COM 	    ((unsigned char *)ip6h < mp->b_datap->db_base) ||
50650Sstevel@tonic-gate 	    !OK_32PTR(ip6h)) {
50668348SEric.Yu@Sun.COM 		mblk_t	*mp1;
50678348SEric.Yu@Sun.COM 
50680Sstevel@tonic-gate 		/* Try to get everything in a single mblk next time */
50690Sstevel@tonic-gate 		if (ip_hdr_len > icmp->icmp_max_hdr_len) {
50700Sstevel@tonic-gate 			icmp->icmp_max_hdr_len = ip_hdr_len;
50718348SEric.Yu@Sun.COM 
50728348SEric.Yu@Sun.COM 			(void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp,
50733448Sdh155122 			    icmp->icmp_max_hdr_len + is->is_wroff_extra);
50740Sstevel@tonic-gate 		}
50753448Sdh155122 		mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO);
50760Sstevel@tonic-gate 		if (!mp1) {
50775240Snordmark 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
50788348SEric.Yu@Sun.COM 			return (ENOMEM);
50790Sstevel@tonic-gate 		}
50808348SEric.Yu@Sun.COM 		mp1->b_cont = mp;
50810Sstevel@tonic-gate 		mp1->b_wptr = mp1->b_datap->db_lim;
50820Sstevel@tonic-gate 		ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len);
50838348SEric.Yu@Sun.COM 		mp = mp1;
50848348SEric.Yu@Sun.COM 	}
50858348SEric.Yu@Sun.COM 	mp->b_rptr = (unsigned char *)ip6h;
50860Sstevel@tonic-gate 	ip6i = (ip6i_t *)ip6h;
50870Sstevel@tonic-gate 
50880Sstevel@tonic-gate #define	ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp)
50890Sstevel@tonic-gate 	if (option_exists & IPPF_HAS_IP6I) {
50900Sstevel@tonic-gate 		ip6h = (ip6_t *)&ip6i[1];
50910Sstevel@tonic-gate 		ip6i->ip6i_flags = 0;
50920Sstevel@tonic-gate 		ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
50930Sstevel@tonic-gate 
50940Sstevel@tonic-gate 		/* sin6_scope_id takes precendence over IPPF_IFINDEX */
50950Sstevel@tonic-gate 		if (option_exists & IPPF_SCOPE_ID) {
50960Sstevel@tonic-gate 			ip6i->ip6i_flags |= IP6I_IFINDEX;
50970Sstevel@tonic-gate 			ip6i->ip6i_ifindex = sin6->sin6_scope_id;
50980Sstevel@tonic-gate 		} else if (option_exists & IPPF_IFINDEX) {
50990Sstevel@tonic-gate 			tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX);
51000Sstevel@tonic-gate 			ASSERT(tipp->ipp_ifindex != 0);
51010Sstevel@tonic-gate 			ip6i->ip6i_flags |= IP6I_IFINDEX;
51020Sstevel@tonic-gate 			ip6i->ip6i_ifindex = tipp->ipp_ifindex;
51030Sstevel@tonic-gate 		}
51040Sstevel@tonic-gate 
51050Sstevel@tonic-gate 		if (option_exists & IPPF_RAW_CKSUM) {
51060Sstevel@tonic-gate 			ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM;
51070Sstevel@tonic-gate 			ip6i->ip6i_checksum_off = icmp->icmp_checksum_off;
51080Sstevel@tonic-gate 		}
51090Sstevel@tonic-gate 
51100Sstevel@tonic-gate 		if (option_exists & IPPF_NO_CKSUM) {
51110Sstevel@tonic-gate 			ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM;
51120Sstevel@tonic-gate 		}
51130Sstevel@tonic-gate 
51140Sstevel@tonic-gate 		if (option_exists & IPPF_ADDR) {
51150Sstevel@tonic-gate 			/*
51160Sstevel@tonic-gate 			 * Enable per-packet source address verification if
51170Sstevel@tonic-gate 			 * IPV6_PKTINFO specified the source address.
51180Sstevel@tonic-gate 			 * ip6_src is set in the transport's _wput function.
51190Sstevel@tonic-gate 			 */
51200Sstevel@tonic-gate 			ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
51210Sstevel@tonic-gate 		}
51220Sstevel@tonic-gate 
51230Sstevel@tonic-gate 		if (option_exists & IPPF_DONTFRAG) {
51240Sstevel@tonic-gate 			ip6i->ip6i_flags |= IP6I_DONTFRAG;
51250Sstevel@tonic-gate 		}
51260Sstevel@tonic-gate 
51270Sstevel@tonic-gate 		if (option_exists & IPPF_USE_MIN_MTU) {
51280Sstevel@tonic-gate 			ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU(
51290Sstevel@tonic-gate 			    ip6i->ip6i_flags, ipp->ipp_use_min_mtu);
51300Sstevel@tonic-gate 		}
51310Sstevel@tonic-gate 
51320Sstevel@tonic-gate 		if (option_exists & IPPF_NEXTHOP) {
51330Sstevel@tonic-gate 			tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP);
51340Sstevel@tonic-gate 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop));
51350Sstevel@tonic-gate 			ip6i->ip6i_flags |= IP6I_NEXTHOP;
51360Sstevel@tonic-gate 			ip6i->ip6i_nexthop = tipp->ipp_nexthop;
51370Sstevel@tonic-gate 		}
51380Sstevel@tonic-gate 
51390Sstevel@tonic-gate 		/*
51400Sstevel@tonic-gate 		 * tell IP this is an ip6i_t private header
51410Sstevel@tonic-gate 		 */
51420Sstevel@tonic-gate 		ip6i->ip6i_nxt = IPPROTO_RAW;
51430Sstevel@tonic-gate 	}
51440Sstevel@tonic-gate 
51450Sstevel@tonic-gate 	/* Initialize IPv6 header */
51460Sstevel@tonic-gate 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
51470Sstevel@tonic-gate 	bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src));
51480Sstevel@tonic-gate 
5149679Sseb 	/* Set the hoplimit of the outgoing packet. */
51500Sstevel@tonic-gate 	if (option_exists & IPPF_HOPLIMIT) {
5151679Sseb 		/* IPV6_HOPLIMIT ancillary data overrides all other settings. */
5152679Sseb 		ip6h->ip6_hops = ipp->ipp_hoplimit;
5153679Sseb 		ip6i->ip6i_flags |= IP6I_HOPLIMIT;
5154679Sseb 	} else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
51550Sstevel@tonic-gate 		ip6h->ip6_hops = icmp->icmp_multicast_ttl;
5156679Sseb 		if (option_exists & IPPF_MULTICAST_HOPS)
5157679Sseb 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
51580Sstevel@tonic-gate 	} else {
51590Sstevel@tonic-gate 		ip6h->ip6_hops = icmp->icmp_ttl;
5160679Sseb 		if (option_exists & IPPF_UNICAST_HOPS)
5161679Sseb 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
51620Sstevel@tonic-gate 	}
51630Sstevel@tonic-gate 
51640Sstevel@tonic-gate 	if (option_exists & IPPF_ADDR) {
51650Sstevel@tonic-gate 		tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR);
51660Sstevel@tonic-gate 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr));
51670Sstevel@tonic-gate 		ip6h->ip6_src = tipp->ipp_addr;
51680Sstevel@tonic-gate 	} else {
51690Sstevel@tonic-gate 		/*
51700Sstevel@tonic-gate 		 * The source address was not set using IPV6_PKTINFO.
51710Sstevel@tonic-gate 		 * First look at the bound source.
51720Sstevel@tonic-gate 		 * If unspecified fallback to __sin6_src_id.
51730Sstevel@tonic-gate 		 */
51740Sstevel@tonic-gate 		ip6h->ip6_src = icmp->icmp_v6src;
51750Sstevel@tonic-gate 		if (sin6->__sin6_src_id != 0 &&
51760Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
51770Sstevel@tonic-gate 			ip_srcid_find_id(sin6->__sin6_src_id,
51783448Sdh155122 			    &ip6h->ip6_src, icmp->icmp_zoneid,
51793448Sdh155122 			    is->is_netstack);
51800Sstevel@tonic-gate 		}
51810Sstevel@tonic-gate 	}
51820Sstevel@tonic-gate 
51830Sstevel@tonic-gate 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
51840Sstevel@tonic-gate 	cp = (uint8_t *)&ip6h[1];
51850Sstevel@tonic-gate 
51860Sstevel@tonic-gate 	/*
51870Sstevel@tonic-gate 	 * Here's where we have to start stringing together
51880Sstevel@tonic-gate 	 * any extension headers in the right order:
51890Sstevel@tonic-gate 	 * Hop-by-hop, destination, routing, and final destination opts.
51900Sstevel@tonic-gate 	 */
51910Sstevel@tonic-gate 	if (option_exists & IPPF_HOPOPTS) {
51920Sstevel@tonic-gate 		/* Hop-by-hop options */
51930Sstevel@tonic-gate 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
51940Sstevel@tonic-gate 		tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS);
51950Sstevel@tonic-gate 
51960Sstevel@tonic-gate 		*nxthdr_ptr = IPPROTO_HOPOPTS;
51970Sstevel@tonic-gate 		nxthdr_ptr = &hbh->ip6h_nxt;
51980Sstevel@tonic-gate 
51990Sstevel@tonic-gate 		bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen);
52000Sstevel@tonic-gate 		cp += tipp->ipp_hopoptslen;
52010Sstevel@tonic-gate 	}
52020Sstevel@tonic-gate 	/*
52030Sstevel@tonic-gate 	 * En-route destination options
52040Sstevel@tonic-gate 	 * Only do them if there's a routing header as well
52050Sstevel@tonic-gate 	 */
52060Sstevel@tonic-gate 	if (option_exists & IPPF_RTDSTOPTS) {
52070Sstevel@tonic-gate 		ip6_dest_t *dst = (ip6_dest_t *)cp;
52080Sstevel@tonic-gate 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS);
52090Sstevel@tonic-gate 
52100Sstevel@tonic-gate 		*nxthdr_ptr = IPPROTO_DSTOPTS;
52110Sstevel@tonic-gate 		nxthdr_ptr = &dst->ip6d_nxt;
52120Sstevel@tonic-gate 
52130Sstevel@tonic-gate 		bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen);
52140Sstevel@tonic-gate 		cp += tipp->ipp_rtdstoptslen;
52150Sstevel@tonic-gate 	}
52160Sstevel@tonic-gate 	/*
52170Sstevel@tonic-gate 	 * Routing header next
52180Sstevel@tonic-gate 	 */
52190Sstevel@tonic-gate 	if (option_exists & IPPF_RTHDR) {
52200Sstevel@tonic-gate 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
52210Sstevel@tonic-gate 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR);
52220Sstevel@tonic-gate 
52230Sstevel@tonic-gate 		*nxthdr_ptr = IPPROTO_ROUTING;
52240Sstevel@tonic-gate 		nxthdr_ptr = &rt->ip6r_nxt;
52250Sstevel@tonic-gate 
52260Sstevel@tonic-gate 		bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen);
52270Sstevel@tonic-gate 		cp += tipp->ipp_rthdrlen;
52280Sstevel@tonic-gate 	}
52290Sstevel@tonic-gate 	/*
52300Sstevel@tonic-gate 	 * Do ultimate destination options
52310Sstevel@tonic-gate 	 */
52320Sstevel@tonic-gate 	if (option_exists & IPPF_DSTOPTS) {
52330Sstevel@tonic-gate 		ip6_dest_t *dest = (ip6_dest_t *)cp;
52340Sstevel@tonic-gate 		tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS);
52350Sstevel@tonic-gate 
52360Sstevel@tonic-gate 		*nxthdr_ptr = IPPROTO_DSTOPTS;
52370Sstevel@tonic-gate 		nxthdr_ptr = &dest->ip6d_nxt;
52380Sstevel@tonic-gate 
52390Sstevel@tonic-gate 		bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen);
52400Sstevel@tonic-gate 		cp += tipp->ipp_dstoptslen;
52410Sstevel@tonic-gate 	}
52420Sstevel@tonic-gate 
52430Sstevel@tonic-gate 	/*
52440Sstevel@tonic-gate 	 * Now set the last header pointer to the proto passed in
52450Sstevel@tonic-gate 	 */
52460Sstevel@tonic-gate 	ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len);
52470Sstevel@tonic-gate 	*nxthdr_ptr = icmp->icmp_proto;
52480Sstevel@tonic-gate 
52490Sstevel@tonic-gate 	/*
52500Sstevel@tonic-gate 	 * Copy in the destination address
52510Sstevel@tonic-gate 	 */
52521676Sjpk 	ip6h->ip6_dst = ip6_dst;
52530Sstevel@tonic-gate 
52540Sstevel@tonic-gate 	ip6h->ip6_vcf =
52555240Snordmark 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
52565240Snordmark 	    (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
52570Sstevel@tonic-gate 
52580Sstevel@tonic-gate 	if (option_exists & IPPF_TCLASS) {
52590Sstevel@tonic-gate 		tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS);
52600Sstevel@tonic-gate 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
52610Sstevel@tonic-gate 		    tipp->ipp_tclass);
52620Sstevel@tonic-gate 	}
52630Sstevel@tonic-gate 	if (option_exists & IPPF_RTHDR) {
52640Sstevel@tonic-gate 		ip6_rthdr_t	*rth;
52650Sstevel@tonic-gate 
52660Sstevel@tonic-gate 		/*
52670Sstevel@tonic-gate 		 * Perform any processing needed for source routing.
52680Sstevel@tonic-gate 		 * We know that all extension headers will be in the same mblk
52690Sstevel@tonic-gate 		 * as the IPv6 header.
52700Sstevel@tonic-gate 		 */
52718348SEric.Yu@Sun.COM 		rth = ip_find_rthdr_v6(ip6h, mp->b_wptr);
52720Sstevel@tonic-gate 		if (rth != NULL && rth->ip6r_segleft != 0) {
52730Sstevel@tonic-gate 			if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
52740Sstevel@tonic-gate 				/*
52750Sstevel@tonic-gate 				 * Drop packet - only support Type 0 routing.
52760Sstevel@tonic-gate 				 * Notify the application as well.
52770Sstevel@tonic-gate 				 */
52785240Snordmark 				BUMP_MIB(&is->is_rawip_mib,
52793448Sdh155122 				    rawipOutErrors);
52808348SEric.Yu@Sun.COM 				return (EPROTO);
52810Sstevel@tonic-gate 			}
52820Sstevel@tonic-gate 			/*
52830Sstevel@tonic-gate 			 * rth->ip6r_len is twice the number of
52840Sstevel@tonic-gate 			 * addresses in the header
52850Sstevel@tonic-gate 			 */
52860Sstevel@tonic-gate 			if (rth->ip6r_len & 0x1) {
52875240Snordmark 				BUMP_MIB(&is->is_rawip_mib,
52883448Sdh155122 				    rawipOutErrors);
52898348SEric.Yu@Sun.COM 				return (EPROTO);
52900Sstevel@tonic-gate 			}
52910Sstevel@tonic-gate 			/*
52920Sstevel@tonic-gate 			 * Shuffle the routing header and ip6_dst
52930Sstevel@tonic-gate 			 * addresses, and get the checksum difference
52940Sstevel@tonic-gate 			 * between the first hop (in ip6_dst) and
52950Sstevel@tonic-gate 			 * the destination (in the last routing hdr entry).
52960Sstevel@tonic-gate 			 */
52973448Sdh155122 			csum = ip_massage_options_v6(ip6h, rth,
52985240Snordmark 			    is->is_netstack);
52990Sstevel@tonic-gate 			/*
53000Sstevel@tonic-gate 			 * Verify that the first hop isn't a mapped address.
53010Sstevel@tonic-gate 			 * Routers along the path need to do this verification
53020Sstevel@tonic-gate 			 * for subsequent hops.
53030Sstevel@tonic-gate 			 */
53040Sstevel@tonic-gate 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
53055240Snordmark 				BUMP_MIB(&is->is_rawip_mib,
53063448Sdh155122 				    rawipOutErrors);
53078348SEric.Yu@Sun.COM 				return (EADDRNOTAVAIL);
53080Sstevel@tonic-gate 			}
53090Sstevel@tonic-gate 		}
53100Sstevel@tonic-gate 	}
53110Sstevel@tonic-gate 
53128348SEric.Yu@Sun.COM 	ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN;
53138348SEric.Yu@Sun.COM 	if (mp->b_cont != NULL)
53148348SEric.Yu@Sun.COM 		ip_len += msgdsize(mp->b_cont);
53150Sstevel@tonic-gate 
53160Sstevel@tonic-gate 	/*
53170Sstevel@tonic-gate 	 * Set the length into the IP header.
53180Sstevel@tonic-gate 	 * If the length is greater than the maximum allowed by IP,
53190Sstevel@tonic-gate 	 * then free the message and return. Do not try and send it
53200Sstevel@tonic-gate 	 * as this can cause problems in layers below.
53210Sstevel@tonic-gate 	 */
53220Sstevel@tonic-gate 	if (ip_len > IP_MAXPACKET) {
53235240Snordmark 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
53248348SEric.Yu@Sun.COM 		return (EMSGSIZE);
53250Sstevel@tonic-gate 	}
53260Sstevel@tonic-gate 	if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) {
53278348SEric.Yu@Sun.COM 		uint_t	cksum_off;	/* From ip6i == mp->b_rptr */
53280Sstevel@tonic-gate 		uint16_t *cksum_ptr;
53290Sstevel@tonic-gate 		uint_t	ext_hdrs_len;
53300Sstevel@tonic-gate 
53310Sstevel@tonic-gate 		/* ICMPv6 must have an offset matching icmp6_cksum offset */
53320Sstevel@tonic-gate 		ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 ||
53330Sstevel@tonic-gate 		    icmp->icmp_checksum_off == 2);
53340Sstevel@tonic-gate 
53350Sstevel@tonic-gate 		/*
53360Sstevel@tonic-gate 		 * We make it easy for IP to include our pseudo header
53370Sstevel@tonic-gate 		 * by putting our length in uh_checksum, modified (if
53380Sstevel@tonic-gate 		 * we have a routing header) by the checksum difference
53390Sstevel@tonic-gate 		 * between the ultimate destination and first hop addresses.
53400Sstevel@tonic-gate 		 * Note: ICMPv6 must always checksum the packet.
53410Sstevel@tonic-gate 		 */
53420Sstevel@tonic-gate 		cksum_off = ip_hdr_len + icmp->icmp_checksum_off;
53438348SEric.Yu@Sun.COM 		if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) {
53448348SEric.Yu@Sun.COM 			if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) {
53455240Snordmark 				BUMP_MIB(&is->is_rawip_mib,
53463448Sdh155122 				    rawipOutErrors);
53470Sstevel@tonic-gate 				freemsg(mp);
53488348SEric.Yu@Sun.COM 				return (0);
53490Sstevel@tonic-gate 			}
53508348SEric.Yu@Sun.COM 			ip6i = (ip6i_t *)mp->b_rptr;
53510Sstevel@tonic-gate 			if (ip6i->ip6i_nxt == IPPROTO_RAW)
53520Sstevel@tonic-gate 				ip6h = (ip6_t *)&ip6i[1];
53530Sstevel@tonic-gate 			else
53540Sstevel@tonic-gate 				ip6h = (ip6_t *)ip6i;
53550Sstevel@tonic-gate 		}
53560Sstevel@tonic-gate 		/* Add payload length to checksum */
53570Sstevel@tonic-gate 		ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN -
53580Sstevel@tonic-gate 		    (int)((uchar_t *)ip6h - (uchar_t *)ip6i);
53590Sstevel@tonic-gate 		csum += htons(ip_len - ext_hdrs_len);
53600Sstevel@tonic-gate 
53610Sstevel@tonic-gate 		cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off);
53620Sstevel@tonic-gate 		csum = (csum & 0xFFFF) + (csum >> 16);
53630Sstevel@tonic-gate 		*cksum_ptr = (uint16_t)csum;
53640Sstevel@tonic-gate 	}
53650Sstevel@tonic-gate 
53660Sstevel@tonic-gate #ifdef _LITTLE_ENDIAN
53670Sstevel@tonic-gate 	ip_len = htons(ip_len);
53680Sstevel@tonic-gate #endif
53690Sstevel@tonic-gate 	ip6h->ip6_plen = (uint16_t)ip_len;
53700Sstevel@tonic-gate 
53710Sstevel@tonic-gate 	/* We're done. Pass the packet to IP */
53725240Snordmark 	BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
53738348SEric.Yu@Sun.COM 	ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT);
53748348SEric.Yu@Sun.COM 	return (0);
53750Sstevel@tonic-gate }
53760Sstevel@tonic-gate 
53770Sstevel@tonic-gate static void
53780Sstevel@tonic-gate icmp_wput_other(queue_t *q, mblk_t *mp)
53790Sstevel@tonic-gate {
53800Sstevel@tonic-gate 	uchar_t	*rptr = mp->b_rptr;
53810Sstevel@tonic-gate 	struct iocblk *iocp;
53820Sstevel@tonic-gate #define	tudr ((struct T_unitdata_req *)rptr)
53835240Snordmark 	conn_t	*connp = Q_TO_CONN(q);
53845240Snordmark 	icmp_t	*icmp = connp->conn_icmp;
53855240Snordmark 	icmp_stack_t *is = icmp->icmp_is;
53860Sstevel@tonic-gate 	cred_t *cr;
53870Sstevel@tonic-gate 
53880Sstevel@tonic-gate 	switch (mp->b_datap->db_type) {
53890Sstevel@tonic-gate 	case M_PROTO:
53900Sstevel@tonic-gate 	case M_PCPROTO:
53910Sstevel@tonic-gate 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
53920Sstevel@tonic-gate 			/*
53930Sstevel@tonic-gate 			 * If the message does not contain a PRIM_type,
53940Sstevel@tonic-gate 			 * throw it away.
53950Sstevel@tonic-gate 			 */
53960Sstevel@tonic-gate 			freemsg(mp);
53970Sstevel@tonic-gate 			return;
53980Sstevel@tonic-gate 		}
53990Sstevel@tonic-gate 		switch (((union T_primitives *)rptr)->type) {
54000Sstevel@tonic-gate 		case T_ADDR_REQ:
54010Sstevel@tonic-gate 			icmp_addr_req(q, mp);
54020Sstevel@tonic-gate 			return;
54030Sstevel@tonic-gate 		case O_T_BIND_REQ:
54040Sstevel@tonic-gate 		case T_BIND_REQ:
54058348SEric.Yu@Sun.COM 			icmp_tpi_bind(q, mp);
54060Sstevel@tonic-gate 			return;
54070Sstevel@tonic-gate 		case T_CONN_REQ:
54088348SEric.Yu@Sun.COM 			icmp_tpi_connect(q, mp);
54090Sstevel@tonic-gate 			return;
54100Sstevel@tonic-gate 		case T_CAPABILITY_REQ:
54110Sstevel@tonic-gate 			icmp_capability_req(q, mp);
54120Sstevel@tonic-gate 			return;
54130Sstevel@tonic-gate 		case T_INFO_REQ:
54140Sstevel@tonic-gate 			icmp_info_req(q, mp);
54150Sstevel@tonic-gate 			return;
54160Sstevel@tonic-gate 		case T_UNITDATA_REQ:
54170Sstevel@tonic-gate 			/*
54180Sstevel@tonic-gate 			 * If a T_UNITDATA_REQ gets here, the address must
54190Sstevel@tonic-gate 			 * be bad.  Valid T_UNITDATA_REQs are found above
54200Sstevel@tonic-gate 			 * and break to below this switch.
54210Sstevel@tonic-gate 			 */
54220Sstevel@tonic-gate 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
54230Sstevel@tonic-gate 			return;
54240Sstevel@tonic-gate 		case T_UNBIND_REQ:
54258348SEric.Yu@Sun.COM 			icmp_tpi_unbind(q, mp);
54260Sstevel@tonic-gate 			return;
54270Sstevel@tonic-gate 
54280Sstevel@tonic-gate 		case T_SVR4_OPTMGMT_REQ:
5429*8778SErik.Nordmark@Sun.COM 			/*
5430*8778SErik.Nordmark@Sun.COM 			 * All Solaris components should pass a db_credp
5431*8778SErik.Nordmark@Sun.COM 			 * for this TPI message, hence we ASSERT.
5432*8778SErik.Nordmark@Sun.COM 			 * But in case there is some other M_PROTO that looks
5433*8778SErik.Nordmark@Sun.COM 			 * like a TPI message sent by some other kernel
5434*8778SErik.Nordmark@Sun.COM 			 * component, we check and return an error.
5435*8778SErik.Nordmark@Sun.COM 			 */
5436*8778SErik.Nordmark@Sun.COM 			cr = msg_getcred(mp, NULL);
5437*8778SErik.Nordmark@Sun.COM 			ASSERT(cr != NULL);
5438*8778SErik.Nordmark@Sun.COM 			if (cr == NULL) {
5439*8778SErik.Nordmark@Sun.COM 				icmp_err_ack(q, mp, TSYSERR, EINVAL);
5440*8778SErik.Nordmark@Sun.COM 				return;
5441*8778SErik.Nordmark@Sun.COM 			}
5442*8778SErik.Nordmark@Sun.COM 
54435240Snordmark 			if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get,
54445240Snordmark 			    cr)) {
54450Sstevel@tonic-gate 				/* Only IP can return anything meaningful */
54460Sstevel@tonic-gate 				(void) svr4_optcom_req(q, mp, cr,
54475240Snordmark 				    &icmp_opt_obj, B_TRUE);
54485240Snordmark 			}
54490Sstevel@tonic-gate 			return;
54500Sstevel@tonic-gate 
54510Sstevel@tonic-gate 		case T_OPTMGMT_REQ:
5452*8778SErik.Nordmark@Sun.COM 			/*
5453*8778SErik.Nordmark@Sun.COM 			 * All Solaris components should pass a db_credp
5454*8778SErik.Nordmark@Sun.COM 			 * for this TPI message, hence we ASSERT.
5455*8778SErik.Nordmark@Sun.COM 			 * But in case there is some other M_PROTO that looks
5456*8778SErik.Nordmark@Sun.COM 			 * like a TPI message sent by some other kernel
5457*8778SErik.Nordmark@Sun.COM 			 * component, we check and return an error.
5458*8778SErik.Nordmark@Sun.COM 			 */
5459*8778SErik.Nordmark@Sun.COM 			cr = msg_getcred(mp, NULL);
5460*8778SErik.Nordmark@Sun.COM 			ASSERT(cr != NULL);
5461*8778SErik.Nordmark@Sun.COM 			if (cr == NULL) {
5462*8778SErik.Nordmark@Sun.COM 				icmp_err_ack(q, mp, TSYSERR, EINVAL);
5463*8778SErik.Nordmark@Sun.COM 				return;
5464*8778SErik.Nordmark@Sun.COM 			}
54650Sstevel@tonic-gate 			/* Only IP can return anything meaningful */
54665240Snordmark 			(void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE);
54670Sstevel@tonic-gate 			return;
54680Sstevel@tonic-gate 
54690Sstevel@tonic-gate 		case T_DISCON_REQ:
54708348SEric.Yu@Sun.COM 			icmp_tpi_disconnect(q, mp);
54710Sstevel@tonic-gate 			return;
54720Sstevel@tonic-gate 
54730Sstevel@tonic-gate 		/* The following TPI message is not supported by icmp. */
54740Sstevel@tonic-gate 		case O_T_CONN_RES:
54750Sstevel@tonic-gate 		case T_CONN_RES:
54760Sstevel@tonic-gate 			icmp_err_ack(q, mp, TNOTSUPPORT, 0);
54770Sstevel@tonic-gate 			return;
54780Sstevel@tonic-gate 
54790Sstevel@tonic-gate 		/* The following 3 TPI requests are illegal for icmp. */
54800Sstevel@tonic-gate 		case T_DATA_REQ:
54810Sstevel@tonic-gate 		case T_EXDATA_REQ:
54820Sstevel@tonic-gate 		case T_ORDREL_REQ:
54830Sstevel@tonic-gate 			freemsg(mp);
54840Sstevel@tonic-gate 			(void) putctl1(RD(q), M_ERROR, EPROTO);
54850Sstevel@tonic-gate 			return;
54860Sstevel@tonic-gate 		default:
54870Sstevel@tonic-gate 			break;
54880Sstevel@tonic-gate 		}
54890Sstevel@tonic-gate 		break;
54900Sstevel@tonic-gate 	case M_IOCTL:
54910Sstevel@tonic-gate 		iocp = (struct iocblk *)mp->b_rptr;
54920Sstevel@tonic-gate 		switch (iocp->ioc_cmd) {
54930Sstevel@tonic-gate 		case TI_GETPEERNAME:
54940Sstevel@tonic-gate 			if (icmp->icmp_state != TS_DATA_XFER) {
54950Sstevel@tonic-gate 				/*
54960Sstevel@tonic-gate 				 * If a default destination address has not
54970Sstevel@tonic-gate 				 * been associated with the stream, then we
54980Sstevel@tonic-gate 				 * don't know the peer's name.
54990Sstevel@tonic-gate 				 */
55000Sstevel@tonic-gate 				iocp->ioc_error = ENOTCONN;
55015240Snordmark 		err_ret:;
55020Sstevel@tonic-gate 				iocp->ioc_count = 0;
55030Sstevel@tonic-gate 				mp->b_datap->db_type = M_IOCACK;
55040Sstevel@tonic-gate 				qreply(q, mp);
55050Sstevel@tonic-gate 				return;
55060Sstevel@tonic-gate 			}
55070Sstevel@tonic-gate 			/* FALLTHRU */
55080Sstevel@tonic-gate 		case TI_GETMYNAME:
55090Sstevel@tonic-gate 			/*
55100Sstevel@tonic-gate 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
55110Sstevel@tonic-gate 			 * need to copyin the user's strbuf structure.
55120Sstevel@tonic-gate 			 * Processing will continue in the M_IOCDATA case
55130Sstevel@tonic-gate 			 * below.
55140Sstevel@tonic-gate 			 */
55150Sstevel@tonic-gate 			mi_copyin(q, mp, NULL,
55160Sstevel@tonic-gate 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
55170Sstevel@tonic-gate 			return;
55180Sstevel@tonic-gate 		case ND_SET:
55190Sstevel@tonic-gate 			/* nd_getset performs the necessary error checking */
55200Sstevel@tonic-gate 		case ND_GET:
55215240Snordmark 			if (nd_getset(q, is->is_nd, mp)) {
55220Sstevel@tonic-gate 				qreply(q, mp);
55230Sstevel@tonic-gate 				return;
55240Sstevel@tonic-gate 			}
55250Sstevel@tonic-gate 			break;
55268348SEric.Yu@Sun.COM 		case _SIOCSOCKFALLBACK:
55278348SEric.Yu@Sun.COM 			/*
55288348SEric.Yu@Sun.COM 			 * socket is falling back to be a
55298348SEric.Yu@Sun.COM 			 * streams socket. Nothing  to do
55308348SEric.Yu@Sun.COM 			 */
55318348SEric.Yu@Sun.COM 			iocp->ioc_count = 0;
55328348SEric.Yu@Sun.COM 			iocp->ioc_rval = 0;
55338348SEric.Yu@Sun.COM 			qreply(q, mp);
55348348SEric.Yu@Sun.COM 			return;
55350Sstevel@tonic-gate 		default:
55360Sstevel@tonic-gate 			break;
55370Sstevel@tonic-gate 		}
55380Sstevel@tonic-gate 		break;
55390Sstevel@tonic-gate 	case M_IOCDATA:
55400Sstevel@tonic-gate 		icmp_wput_iocdata(q, mp);
55410Sstevel@tonic-gate 		return;
55420Sstevel@tonic-gate 	default:
55430Sstevel@tonic-gate 		break;
55440Sstevel@tonic-gate 	}
55455240Snordmark 	ip_wput(q, mp);
55460Sstevel@tonic-gate }
55470Sstevel@tonic-gate 
55480Sstevel@tonic-gate /*
55490Sstevel@tonic-gate  * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA
55500Sstevel@tonic-gate  * messages.
55510Sstevel@tonic-gate  */
55520Sstevel@tonic-gate static void
55530Sstevel@tonic-gate icmp_wput_iocdata(queue_t *q, mblk_t *mp)
55540Sstevel@tonic-gate {
55550Sstevel@tonic-gate 	mblk_t	*mp1;
55560Sstevel@tonic-gate 	STRUCT_HANDLE(strbuf, sb);
55570Sstevel@tonic-gate 	icmp_t	*icmp;
55588348SEric.Yu@Sun.COM 	uint_t	addrlen;
55598348SEric.Yu@Sun.COM 	uint_t	error;
55600Sstevel@tonic-gate 
55610Sstevel@tonic-gate 	/* Make sure it is one of ours. */
55620Sstevel@tonic-gate 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
55630Sstevel@tonic-gate 	case TI_GETMYNAME:
55640Sstevel@tonic-gate 	case TI_GETPEERNAME:
55650Sstevel@tonic-gate 		break;
55660Sstevel@tonic-gate 	default:
55675240Snordmark 		icmp = Q_TO_ICMP(q);
55685240Snordmark 		ip_output(icmp->icmp_connp, mp, q, IP_WPUT);
55690Sstevel@tonic-gate 		return;
55700Sstevel@tonic-gate 	}
55710Sstevel@tonic-gate 	switch (mi_copy_state(q, mp, &mp1)) {
55720Sstevel@tonic-gate 	case -1:
55730Sstevel@tonic-gate 		return;
55740Sstevel@tonic-gate 	case MI_COPY_CASE(MI_COPY_IN, 1):
55750Sstevel@tonic-gate 		break;
55760Sstevel@tonic-gate 	case MI_COPY_CASE(MI_COPY_OUT, 1):
55770Sstevel@tonic-gate 		/*
55780Sstevel@tonic-gate 		 * The address has been copied out, so now
55790Sstevel@tonic-gate 		 * copyout the strbuf.
55800Sstevel@tonic-gate 		 */
55810Sstevel@tonic-gate 		mi_copyout(q, mp);
55820Sstevel@tonic-gate 		return;
55830Sstevel@tonic-gate 	case MI_COPY_CASE(MI_COPY_OUT, 2):
55840Sstevel@tonic-gate 		/*
55850Sstevel@tonic-gate 		 * The address and strbuf have been copied out.
55860Sstevel@tonic-gate 		 * We're done, so just acknowledge the original
55870Sstevel@tonic-gate 		 * M_IOCTL.
55880Sstevel@tonic-gate 		 */
55890Sstevel@tonic-gate 		mi_copy_done(q, mp, 0);
55900Sstevel@tonic-gate 		return;
55910Sstevel@tonic-gate 	default:
55920Sstevel@tonic-gate 		/*
55930Sstevel@tonic-gate 		 * Something strange has happened, so acknowledge
55940Sstevel@tonic-gate 		 * the original M_IOCTL with an EPROTO error.
55950Sstevel@tonic-gate 		 */
55960Sstevel@tonic-gate 		mi_copy_done(q, mp, EPROTO);
55970Sstevel@tonic-gate 		return;
55980Sstevel@tonic-gate 	}
55990Sstevel@tonic-gate 	/*
56000Sstevel@tonic-gate 	 * Now we have the strbuf structure for TI_GETMYNAME
56010Sstevel@tonic-gate 	 * and TI_GETPEERNAME.  Next we copyout the requested
56020Sstevel@tonic-gate 	 * address and then we'll copyout the strbuf.
56030Sstevel@tonic-gate 	 */
56040Sstevel@tonic-gate 	STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag,
56050Sstevel@tonic-gate 	    (void *)mp1->b_rptr);
56065240Snordmark 	icmp = Q_TO_ICMP(q);
56070Sstevel@tonic-gate 	if (icmp->icmp_family == AF_INET)
56080Sstevel@tonic-gate 		addrlen = sizeof (sin_t);
56090Sstevel@tonic-gate 	else
56100Sstevel@tonic-gate 		addrlen = sizeof (sin6_t);
56110Sstevel@tonic-gate 
56120Sstevel@tonic-gate 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
56130Sstevel@tonic-gate 		mi_copy_done(q, mp, EINVAL);
56140Sstevel@tonic-gate 		return;
56150Sstevel@tonic-gate 	}
56168348SEric.Yu@Sun.COM 
56178348SEric.Yu@Sun.COM 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
56188348SEric.Yu@Sun.COM 
56198348SEric.Yu@Sun.COM 	if (mp1 == NULL)
56208348SEric.Yu@Sun.COM 		return;
56218348SEric.Yu@Sun.COM 
56228348SEric.Yu@Sun.COM 	rw_enter(&icmp->icmp_rwlock, RW_READER);
56230Sstevel@tonic-gate 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
56240Sstevel@tonic-gate 	case TI_GETMYNAME:
56258348SEric.Yu@Sun.COM 		error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr,
56268348SEric.Yu@Sun.COM 		    &addrlen);
56270Sstevel@tonic-gate 		break;
56280Sstevel@tonic-gate 	case TI_GETPEERNAME:
56298348SEric.Yu@Sun.COM 		error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr,
56308348SEric.Yu@Sun.COM 		    &addrlen);
56310Sstevel@tonic-gate 		break;
56328348SEric.Yu@Sun.COM 	}
56338348SEric.Yu@Sun.COM 	rw_exit(&icmp->icmp_rwlock);
56348348SEric.Yu@Sun.COM 
56358348SEric.Yu@Sun.COM 	if (error != 0) {
56368348SEric.Yu@Sun.COM 		mi_copy_done(q, mp, error);
56370Sstevel@tonic-gate 	} else {
56388348SEric.Yu@Sun.COM 		mp1->b_wptr += addrlen;
56398348SEric.Yu@Sun.COM 		STRUCT_FSET(sb, len, addrlen);
56408348SEric.Yu@Sun.COM 
56418348SEric.Yu@Sun.COM 		/* Copy out the address */
56428348SEric.Yu@Sun.COM 		mi_copyout(q, mp);
56438348SEric.Yu@Sun.COM 	}
56440Sstevel@tonic-gate }
56450Sstevel@tonic-gate 
56460Sstevel@tonic-gate static int
56470Sstevel@tonic-gate icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
56480Sstevel@tonic-gate     void *thisdg_attrs)
56490Sstevel@tonic-gate {
56500Sstevel@tonic-gate 	struct T_unitdata_req *udreqp;
56510Sstevel@tonic-gate 	int is_absreq_failure;
56520Sstevel@tonic-gate 	cred_t *cr;
56530Sstevel@tonic-gate 
56540Sstevel@tonic-gate 	udreqp = (struct T_unitdata_req *)mp->b_rptr;
56550Sstevel@tonic-gate 	*errorp = 0;
56560Sstevel@tonic-gate 
5657*8778SErik.Nordmark@Sun.COM 	/*
5658*8778SErik.Nordmark@Sun.COM 	 * All Solaris components should pass a db_credp
5659*8778SErik.Nordmark@Sun.COM 	 * for this TPI message, hence we ASSERT.
5660*8778SErik.Nordmark@Sun.COM 	 * But in case there is some other M_PROTO that looks
5661*8778SErik.Nordmark@Sun.COM 	 * like a TPI message sent by some other kernel
5662*8778SErik.Nordmark@Sun.COM 	 * component, we check and return an error.
5663*8778SErik.Nordmark@Sun.COM 	 */
5664*8778SErik.Nordmark@Sun.COM 	cr = msg_getcred(mp, NULL);
5665*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
5666*8778SErik.Nordmark@Sun.COM 	if (cr == NULL)
5667*8778SErik.Nordmark@Sun.COM 		return (-1);
56680Sstevel@tonic-gate 
56690Sstevel@tonic-gate 	*errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length,
56700Sstevel@tonic-gate 	    udreqp->OPT_offset, cr, &icmp_opt_obj,
56710Sstevel@tonic-gate 	    thisdg_attrs, &is_absreq_failure);
56720Sstevel@tonic-gate 
56730Sstevel@tonic-gate 	if (*errorp != 0) {
56740Sstevel@tonic-gate 		/*
56750Sstevel@tonic-gate 		 * Note: No special action needed in this
56760Sstevel@tonic-gate 		 * module for "is_absreq_failure"
56770Sstevel@tonic-gate 		 */
56780Sstevel@tonic-gate 		return (-1);		/* failure */
56790Sstevel@tonic-gate 	}
56800Sstevel@tonic-gate 	ASSERT(is_absreq_failure == 0);
56810Sstevel@tonic-gate 	return (0);	/* success */
56820Sstevel@tonic-gate }
56830Sstevel@tonic-gate 
56840Sstevel@tonic-gate void
56858348SEric.Yu@Sun.COM icmp_ddi_g_init(void)
56860Sstevel@tonic-gate {
56875381Smeem 	icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr,
56885240Snordmark 	    icmp_opt_obj.odb_opt_arr_cnt);
56890Sstevel@tonic-gate 
56903448Sdh155122 	/*
56913448Sdh155122 	 * We want to be informed each time a stack is created or
56923448Sdh155122 	 * destroyed in the kernel, so we can maintain the
56933448Sdh155122 	 * set of icmp_stack_t's.
56943448Sdh155122 	 */
56953448Sdh155122 	netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini);
56960Sstevel@tonic-gate }
56970Sstevel@tonic-gate 
56980Sstevel@tonic-gate void
56998348SEric.Yu@Sun.COM icmp_ddi_g_destroy(void)
57000Sstevel@tonic-gate {
57013448Sdh155122 	netstack_unregister(NS_ICMP);
57020Sstevel@tonic-gate }
57030Sstevel@tonic-gate 
57048348SEric.Yu@Sun.COM #define	INET_NAME	"ip"
57058348SEric.Yu@Sun.COM 
57063448Sdh155122 /*
57073448Sdh155122  * Initialize the ICMP stack instance.
57083448Sdh155122  */
57093448Sdh155122 static void *
57103448Sdh155122 rawip_stack_init(netstackid_t stackid, netstack_t *ns)
57113448Sdh155122 {
57123448Sdh155122 	icmp_stack_t	*is;
57133448Sdh155122 	icmpparam_t	*pa;
57148348SEric.Yu@Sun.COM 	int		error = 0;
57158348SEric.Yu@Sun.COM 	major_t		major;
57163448Sdh155122 
57173448Sdh155122 	is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP);
57183448Sdh155122 	is->is_netstack = ns;
57193448Sdh155122 
57203448Sdh155122 	pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP);
57213448Sdh155122 	is->is_param_arr = pa;
57223448Sdh155122 	bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr));
57233448Sdh155122 
57243448Sdh155122 	(void) icmp_param_register(&is->is_nd,
57253448Sdh155122 	    is->is_param_arr, A_CNT(icmp_param_arr));
57263448Sdh155122 	is->is_ksp = rawip_kstat_init(stackid);
57278348SEric.Yu@Sun.COM 
57288348SEric.Yu@Sun.COM 	major = mod_name_to_major(INET_NAME);
57298348SEric.Yu@Sun.COM 	error = ldi_ident_from_major(major, &is->is_ldi_ident);
57308348SEric.Yu@Sun.COM 	ASSERT(error == 0);
57313448Sdh155122 	return (is);
57323448Sdh155122 }
57333448Sdh155122 
57343448Sdh155122 /*
57353448Sdh155122  * Free the ICMP stack instance.
57363448Sdh155122  */
57370Sstevel@tonic-gate static void
57383448Sdh155122 rawip_stack_fini(netstackid_t stackid, void *arg)
57393448Sdh155122 {
57403448Sdh155122 	icmp_stack_t *is = (icmp_stack_t *)arg;
57413448Sdh155122 
57423448Sdh155122 	nd_free(&is->is_nd);
57433448Sdh155122 	kmem_free(is->is_param_arr, sizeof (icmp_param_arr));
57443448Sdh155122 	is->is_param_arr = NULL;
57453448Sdh155122 
57463448Sdh155122 	rawip_kstat_fini(stackid, is->is_ksp);
57473448Sdh155122 	is->is_ksp = NULL;
57488348SEric.Yu@Sun.COM 	ldi_ident_release(is->is_ldi_ident);
57493448Sdh155122 	kmem_free(is, sizeof (*is));
57503448Sdh155122 }
57513448Sdh155122 
57523448Sdh155122 static void *
57533448Sdh155122 rawip_kstat_init(netstackid_t stackid) {
57543448Sdh155122 	kstat_t	*ksp;
57550Sstevel@tonic-gate 
57560Sstevel@tonic-gate 	rawip_named_kstat_t template = {
57570Sstevel@tonic-gate 		{ "inDatagrams",	KSTAT_DATA_UINT32, 0 },
57580Sstevel@tonic-gate 		{ "inCksumErrs",	KSTAT_DATA_UINT32, 0 },
57590Sstevel@tonic-gate 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
57600Sstevel@tonic-gate 		{ "outDatagrams",	KSTAT_DATA_UINT32, 0 },
57610Sstevel@tonic-gate 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
57620Sstevel@tonic-gate 	};
57630Sstevel@tonic-gate 
57643448Sdh155122 	ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2",
57650Sstevel@tonic-gate 					KSTAT_TYPE_NAMED,
57660Sstevel@tonic-gate 					NUM_OF_FIELDS(rawip_named_kstat_t),
57673448Sdh155122 					0, stackid);
57683448Sdh155122 	if (ksp == NULL || ksp->ks_data == NULL)
57693448Sdh155122 		return (NULL);
57703448Sdh155122 
57713448Sdh155122 	bcopy(&template, ksp->ks_data, sizeof (template));
57723448Sdh155122 	ksp->ks_update = rawip_kstat_update;
57733448Sdh155122 	ksp->ks_private = (void *)(uintptr_t)stackid;
57743448Sdh155122 
57753448Sdh155122 	kstat_install(ksp);
57763448Sdh155122 	return (ksp);
57770Sstevel@tonic-gate }
57780Sstevel@tonic-gate 
57790Sstevel@tonic-gate static void
57803448Sdh155122 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp)
57813448Sdh155122 {
57823448Sdh155122 	if (ksp != NULL) {
57833448Sdh155122 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
57843448Sdh155122 		kstat_delete_netstack(ksp, stackid);
57850Sstevel@tonic-gate 	}
57860Sstevel@tonic-gate }
57870Sstevel@tonic-gate 
57880Sstevel@tonic-gate static int
57893448Sdh155122 rawip_kstat_update(kstat_t *ksp, int rw)
57903448Sdh155122 {
57910Sstevel@tonic-gate 	rawip_named_kstat_t *rawipkp;
57923448Sdh155122 	netstackid_t	stackid = (netstackid_t)(uintptr_t)ksp->ks_private;
57933448Sdh155122 	netstack_t	*ns;
57943448Sdh155122 	icmp_stack_t	*is;
57953448Sdh155122 
57963448Sdh155122 	if ((ksp == NULL) || (ksp->ks_data == NULL))
57970Sstevel@tonic-gate 		return (EIO);
57980Sstevel@tonic-gate 
57990Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
58000Sstevel@tonic-gate 		return (EACCES);
58010Sstevel@tonic-gate 
58023448Sdh155122 	rawipkp = (rawip_named_kstat_t *)ksp->ks_data;
58033448Sdh155122 
58043448Sdh155122 	ns = netstack_find_by_stackid(stackid);
58053448Sdh155122 	if (ns == NULL)
58063448Sdh155122 		return (-1);
58073448Sdh155122 	is = ns->netstack_icmp;
58083448Sdh155122 	if (is == NULL) {
58093448Sdh155122 		netstack_rele(ns);
58103448Sdh155122 		return (-1);
58113448Sdh155122 	}
58123448Sdh155122 	rawipkp->inDatagrams.value.ui32 =  is->is_rawip_mib.rawipInDatagrams;
58133448Sdh155122 	rawipkp->inCksumErrs.value.ui32 =  is->is_rawip_mib.rawipInCksumErrs;
58143448Sdh155122 	rawipkp->inErrors.value.ui32 =	   is->is_rawip_mib.rawipInErrors;
58153448Sdh155122 	rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams;
58163448Sdh155122 	rawipkp->outErrors.value.ui32 =	   is->is_rawip_mib.rawipOutErrors;
58173448Sdh155122 	netstack_rele(ns);
58180Sstevel@tonic-gate 	return (0);
58190Sstevel@tonic-gate }
58208348SEric.Yu@Sun.COM 
58218348SEric.Yu@Sun.COM /* ARGSUSED */
58228348SEric.Yu@Sun.COM int
58238348SEric.Yu@Sun.COM rawip_accept(sock_lower_handle_t lproto_handle,
58248348SEric.Yu@Sun.COM     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
58258348SEric.Yu@Sun.COM     cred_t *cr)
58268348SEric.Yu@Sun.COM {
58278348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
58288348SEric.Yu@Sun.COM }
58298348SEric.Yu@Sun.COM 
58308348SEric.Yu@Sun.COM /* ARGSUSED */
58318348SEric.Yu@Sun.COM int
58328348SEric.Yu@Sun.COM rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
58338348SEric.Yu@Sun.COM     socklen_t len, cred_t *cr)
58348348SEric.Yu@Sun.COM {
58358348SEric.Yu@Sun.COM 	conn_t  *connp = (conn_t *)proto_handle;
58368348SEric.Yu@Sun.COM 	int error;
58378348SEric.Yu@Sun.COM 
5838*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
5839*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
5840*8778SErik.Nordmark@Sun.COM 
58418348SEric.Yu@Sun.COM 	/* Binding to a NULL address really means unbind */
58428348SEric.Yu@Sun.COM 	if (sa == NULL)
58438348SEric.Yu@Sun.COM 		error = rawip_do_unbind(connp);
58448348SEric.Yu@Sun.COM 	else
58458348SEric.Yu@Sun.COM 		error = rawip_do_bind(connp, sa, len);
58468348SEric.Yu@Sun.COM 
58478348SEric.Yu@Sun.COM 	if (error < 0) {
58488348SEric.Yu@Sun.COM 		if (error == -TOUTSTATE)
58498348SEric.Yu@Sun.COM 			error = EINVAL;
58508348SEric.Yu@Sun.COM 		else
58518348SEric.Yu@Sun.COM 			error = proto_tlitosyserr(-error);
58528348SEric.Yu@Sun.COM 	}
58538348SEric.Yu@Sun.COM 	return (error);
58548348SEric.Yu@Sun.COM }
58558348SEric.Yu@Sun.COM 
58568348SEric.Yu@Sun.COM static int
58578348SEric.Yu@Sun.COM rawip_implicit_bind(conn_t *connp)
58588348SEric.Yu@Sun.COM {
58598348SEric.Yu@Sun.COM 	sin6_t sin6addr;
58608348SEric.Yu@Sun.COM 	sin_t *sin;
58618348SEric.Yu@Sun.COM 	sin6_t *sin6;
58628348SEric.Yu@Sun.COM 	socklen_t len;
58638348SEric.Yu@Sun.COM 	int error;
58648348SEric.Yu@Sun.COM 
58658348SEric.Yu@Sun.COM 	if (connp->conn_icmp->icmp_family == AF_INET) {
58668348SEric.Yu@Sun.COM 		len = sizeof (struct sockaddr_in);
58678348SEric.Yu@Sun.COM 		sin = (sin_t *)&sin6addr;
58688348SEric.Yu@Sun.COM 		*sin = sin_null;
58698348SEric.Yu@Sun.COM 		sin->sin_family = AF_INET;
58708348SEric.Yu@Sun.COM 		sin->sin_addr.s_addr = INADDR_ANY;
58718348SEric.Yu@Sun.COM 	} else {
58728348SEric.Yu@Sun.COM 		ASSERT(connp->conn_icmp->icmp_family == AF_INET6);
58738348SEric.Yu@Sun.COM 		len = sizeof (sin6_t);
58748348SEric.Yu@Sun.COM 		sin6 = (sin6_t *)&sin6addr;
58758348SEric.Yu@Sun.COM 		*sin6 = sin6_null;
58768348SEric.Yu@Sun.COM 		sin6->sin6_family = AF_INET6;
58778348SEric.Yu@Sun.COM 		V6_SET_ZERO(sin6->sin6_addr);
58788348SEric.Yu@Sun.COM 	}
58798348SEric.Yu@Sun.COM 
58808348SEric.Yu@Sun.COM 	error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len);
58818348SEric.Yu@Sun.COM 
58828348SEric.Yu@Sun.COM 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
58838348SEric.Yu@Sun.COM }
58848348SEric.Yu@Sun.COM 
58858348SEric.Yu@Sun.COM static int
58868348SEric.Yu@Sun.COM rawip_unbind(conn_t *connp)
58878348SEric.Yu@Sun.COM {
58888348SEric.Yu@Sun.COM 	int error;
58898348SEric.Yu@Sun.COM 
58908348SEric.Yu@Sun.COM 	error = rawip_do_unbind(connp);
58918348SEric.Yu@Sun.COM 	if (error < 0) {
58928348SEric.Yu@Sun.COM 		error = proto_tlitosyserr(-error);
58938348SEric.Yu@Sun.COM 	}
58948348SEric.Yu@Sun.COM 	return (error);
58958348SEric.Yu@Sun.COM }
58968348SEric.Yu@Sun.COM 
58978348SEric.Yu@Sun.COM /* ARGSUSED */
58988348SEric.Yu@Sun.COM int
58998348SEric.Yu@Sun.COM rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
59008348SEric.Yu@Sun.COM {
59018348SEric.Yu@Sun.COM 	return (EOPNOTSUPP);
59028348SEric.Yu@Sun.COM }
59038348SEric.Yu@Sun.COM 
59048348SEric.Yu@Sun.COM /* ARGSUSED */
59058348SEric.Yu@Sun.COM int
59068348SEric.Yu@Sun.COM rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
59078348SEric.Yu@Sun.COM     socklen_t len, sock_connid_t *id, cred_t *cr)
59088348SEric.Yu@Sun.COM {
59098348SEric.Yu@Sun.COM 	conn_t	*connp = (conn_t *)proto_handle;
59108348SEric.Yu@Sun.COM 	icmp_t *icmp = connp->conn_icmp;
59118348SEric.Yu@Sun.COM 	int	error;
59128348SEric.Yu@Sun.COM 	boolean_t did_bind = B_FALSE;
59138348SEric.Yu@Sun.COM 
5914*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
5915*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
5916*8778SErik.Nordmark@Sun.COM 
59178348SEric.Yu@Sun.COM 	if (sa == NULL) {
59188348SEric.Yu@Sun.COM 		/*
59198348SEric.Yu@Sun.COM 		 * Disconnect
59208348SEric.Yu@Sun.COM 		 * Make sure we are connected
59218348SEric.Yu@Sun.COM 		 */
59228348SEric.Yu@Sun.COM 		if (icmp->icmp_state != TS_DATA_XFER)
59238348SEric.Yu@Sun.COM 			return (EINVAL);
59248348SEric.Yu@Sun.COM 
59258348SEric.Yu@Sun.COM 		error = icmp_disconnect(connp);
59268348SEric.Yu@Sun.COM 		return (error);
59278348SEric.Yu@Sun.COM 	}
59288348SEric.Yu@Sun.COM 
59298348SEric.Yu@Sun.COM 	error = proto_verify_ip_addr(icmp->icmp_family, sa, len);
59308348SEric.Yu@Sun.COM 	if (error != 0)
59318348SEric.Yu@Sun.COM 		return (error);
59328348SEric.Yu@Sun.COM 
59338348SEric.Yu@Sun.COM 	/* do an implicit bind if necessary */
59348348SEric.Yu@Sun.COM 	if (icmp->icmp_state == TS_UNBND) {
59358348SEric.Yu@Sun.COM 		error = rawip_implicit_bind(connp);
59368348SEric.Yu@Sun.COM 		/*
59378348SEric.Yu@Sun.COM 		 * We could be racing with an actual bind, in which case
59388348SEric.Yu@Sun.COM 		 * we would see EPROTO. We cross our fingers and try
59398348SEric.Yu@Sun.COM 		 * to connect.
59408348SEric.Yu@Sun.COM 		 */
59418348SEric.Yu@Sun.COM 		if (!(error == 0 || error == EPROTO))
59428348SEric.Yu@Sun.COM 			return (error);
59438348SEric.Yu@Sun.COM 		did_bind = B_TRUE;
59448348SEric.Yu@Sun.COM 	}
59458348SEric.Yu@Sun.COM 
59468348SEric.Yu@Sun.COM 	/*
59478348SEric.Yu@Sun.COM 	 * set SO_DGRAM_ERRIND
59488348SEric.Yu@Sun.COM 	 */
59498348SEric.Yu@Sun.COM 	icmp->icmp_dgram_errind = B_TRUE;
59508348SEric.Yu@Sun.COM 
5951*8778SErik.Nordmark@Sun.COM 	error = rawip_do_connect(connp, sa, len, cr);
59528348SEric.Yu@Sun.COM 
59538348SEric.Yu@Sun.COM 	if (error != 0 && did_bind) {
59548348SEric.Yu@Sun.COM 		int unbind_err;
59558348SEric.Yu@Sun.COM 
59568348SEric.Yu@Sun.COM 		unbind_err = rawip_unbind(connp);
59578348SEric.Yu@Sun.COM 		ASSERT(unbind_err == 0);
59588348SEric.Yu@Sun.COM 	}
59598348SEric.Yu@Sun.COM 
59608348SEric.Yu@Sun.COM 	if (error == 0) {
59618348SEric.Yu@Sun.COM 		*id = 0;
59628348SEric.Yu@Sun.COM 		(*connp->conn_upcalls->su_connected)
59638348SEric.Yu@Sun.COM 		    (connp->conn_upper_handle, 0, NULL, -1);
59648348SEric.Yu@Sun.COM 	} else if (error < 0) {
59658348SEric.Yu@Sun.COM 		error = proto_tlitosyserr(-error);
59668348SEric.Yu@Sun.COM 	}
59678348SEric.Yu@Sun.COM 	return (error);
59688348SEric.Yu@Sun.COM }
59698348SEric.Yu@Sun.COM 
59708348SEric.Yu@Sun.COM /* ARGSUSED */
59718348SEric.Yu@Sun.COM void
59728348SEric.Yu@Sun.COM rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q,
59738348SEric.Yu@Sun.COM     boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb)
59748348SEric.Yu@Sun.COM {
59758348SEric.Yu@Sun.COM 	conn_t  *connp = (conn_t *)proto_handle;
59768348SEric.Yu@Sun.COM 	icmp_t	*icmp;
59778348SEric.Yu@Sun.COM 	struct T_capability_ack tca;
59788348SEric.Yu@Sun.COM 	struct sockaddr_in6 laddr, faddr;
59798348SEric.Yu@Sun.COM 	socklen_t laddrlen, faddrlen;
59808348SEric.Yu@Sun.COM 	short opts;
59818348SEric.Yu@Sun.COM 	struct stroptions *stropt;
59828348SEric.Yu@Sun.COM 	mblk_t *stropt_mp;
59838348SEric.Yu@Sun.COM 	int error;
59848348SEric.Yu@Sun.COM 
59858348SEric.Yu@Sun.COM 	icmp = connp->conn_icmp;
59868348SEric.Yu@Sun.COM 
59878348SEric.Yu@Sun.COM 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
59888348SEric.Yu@Sun.COM 
59898348SEric.Yu@Sun.COM 	/*
59908348SEric.Yu@Sun.COM 	 * setup the fallback stream that was allocated
59918348SEric.Yu@Sun.COM 	 */
59928348SEric.Yu@Sun.COM 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
59938348SEric.Yu@Sun.COM 	connp->conn_minor_arena = WR(q)->q_ptr;
59948348SEric.Yu@Sun.COM 
59958348SEric.Yu@Sun.COM 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
59968348SEric.Yu@Sun.COM 
59978348SEric.Yu@Sun.COM 	WR(q)->q_qinfo = &icmpwinit;
59988348SEric.Yu@Sun.COM 
59998348SEric.Yu@Sun.COM 	connp->conn_rq = RD(q);
60008348SEric.Yu@Sun.COM 	connp->conn_wq = WR(q);
60018348SEric.Yu@Sun.COM 
60028348SEric.Yu@Sun.COM 	/* Notify stream head about options before sending up data */
60038348SEric.Yu@Sun.COM 	stropt_mp->b_datap->db_type = M_SETOPTS;
60048348SEric.Yu@Sun.COM 	stropt_mp->b_wptr += sizeof (*stropt);
60058348SEric.Yu@Sun.COM 	stropt = (struct stroptions *)stropt_mp->b_rptr;
60068348SEric.Yu@Sun.COM 	stropt->so_flags = SO_WROFF | SO_HIWAT;
60078348SEric.Yu@Sun.COM 	stropt->so_wroff =
60088348SEric.Yu@Sun.COM 	    (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra);
60098348SEric.Yu@Sun.COM 	stropt->so_hiwat = icmp->icmp_recv_hiwat;
60108348SEric.Yu@Sun.COM 	putnext(RD(q), stropt_mp);
60118348SEric.Yu@Sun.COM 
60128348SEric.Yu@Sun.COM 	/*
60138348SEric.Yu@Sun.COM 	 * free helper stream
60148348SEric.Yu@Sun.COM 	 */
60158477SRao.Shoaib@Sun.COM 	ip_free_helper_stream(connp);
60168348SEric.Yu@Sun.COM 
60178348SEric.Yu@Sun.COM 	/*
60188348SEric.Yu@Sun.COM 	 * Collect the information needed to sync with the sonode
60198348SEric.Yu@Sun.COM 	 */
60208348SEric.Yu@Sun.COM 	icmp_do_capability_ack(icmp, &tca, TC1_INFO);
60218348SEric.Yu@Sun.COM 
60228348SEric.Yu@Sun.COM 	laddrlen = faddrlen = sizeof (sin6_t);
60238348SEric.Yu@Sun.COM 	(void) rawip_getsockname((sock_lower_handle_t)connp,
6024*8778SErik.Nordmark@Sun.COM 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
60258348SEric.Yu@Sun.COM 	error = rawip_getpeername((sock_lower_handle_t)connp,
6026*8778SErik.Nordmark@Sun.COM 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
60278348SEric.Yu@Sun.COM 	if (error != 0)
60288348SEric.Yu@Sun.COM 		faddrlen = 0;
60298348SEric.Yu@Sun.COM 	opts = 0;
60308348SEric.Yu@Sun.COM 	if (icmp->icmp_dgram_errind)
60318348SEric.Yu@Sun.COM 		opts |= SO_DGRAM_ERRIND;
60328348SEric.Yu@Sun.COM 	if (icmp->icmp_dontroute)
60338348SEric.Yu@Sun.COM 		opts |= SO_DONTROUTE;
60348348SEric.Yu@Sun.COM 
60358348SEric.Yu@Sun.COM 	/*
60368348SEric.Yu@Sun.COM 	 * Once we grab the drain lock, no data will be send up
60378348SEric.Yu@Sun.COM 	 * to the socket. So we notify the socket that the endpoint
60388348SEric.Yu@Sun.COM 	 * is quiescent and it's therefore safe move data from
60398348SEric.Yu@Sun.COM 	 * the socket to the stream head.
60408348SEric.Yu@Sun.COM 	 */
60418348SEric.Yu@Sun.COM 	(*quiesced_cb)(connp->conn_upper_handle, q, &tca,
60428348SEric.Yu@Sun.COM 	    (struct sockaddr *)&laddr, laddrlen,
60438348SEric.Yu@Sun.COM 	    (struct sockaddr *)&faddr, faddrlen, opts);
60448348SEric.Yu@Sun.COM 
60458348SEric.Yu@Sun.COM 	/*
60468348SEric.Yu@Sun.COM 	 * push up any packets that were queued in icmp_t
60478348SEric.Yu@Sun.COM 	 */
60488348SEric.Yu@Sun.COM 
60498348SEric.Yu@Sun.COM 	mutex_enter(&icmp->icmp_recv_lock);
60508348SEric.Yu@Sun.COM 	while (icmp->icmp_fallback_queue_head != NULL) {
60518348SEric.Yu@Sun.COM 		mblk_t	*mp;
60528348SEric.Yu@Sun.COM 
60538348SEric.Yu@Sun.COM 		mp = icmp->icmp_fallback_queue_head;
60548348SEric.Yu@Sun.COM 		icmp->icmp_fallback_queue_head = mp->b_next;
60558348SEric.Yu@Sun.COM 		mp->b_next = NULL;
60568348SEric.Yu@Sun.COM 		mutex_exit(&icmp->icmp_recv_lock);
60578348SEric.Yu@Sun.COM 		putnext(RD(q), mp);
60588348SEric.Yu@Sun.COM 		mutex_enter(&icmp->icmp_recv_lock);
60598348SEric.Yu@Sun.COM 	}
60608348SEric.Yu@Sun.COM 	icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head;
60618348SEric.Yu@Sun.COM 	/*
60628348SEric.Yu@Sun.COM 	 * No longer a streams less socket
60638348SEric.Yu@Sun.COM 	 */
60648348SEric.Yu@Sun.COM 	connp->conn_flags &= ~IPCL_NONSTR;
60658348SEric.Yu@Sun.COM 	mutex_exit(&icmp->icmp_recv_lock);
60668348SEric.Yu@Sun.COM 	ASSERT(icmp->icmp_fallback_queue_head == NULL &&
60678348SEric.Yu@Sun.COM 	    icmp->icmp_fallback_queue_tail == NULL);
60688348SEric.Yu@Sun.COM 
60698348SEric.Yu@Sun.COM 	ASSERT(connp->conn_ref >= 1);
60708348SEric.Yu@Sun.COM }
60718348SEric.Yu@Sun.COM 
60728348SEric.Yu@Sun.COM /* ARGSUSED */
60738348SEric.Yu@Sun.COM sock_lower_handle_t
60748348SEric.Yu@Sun.COM rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
60758348SEric.Yu@Sun.COM     uint_t *smodep, int *errorp, int flags, cred_t *credp)
60768348SEric.Yu@Sun.COM {
60778348SEric.Yu@Sun.COM 	conn_t *connp;
60788348SEric.Yu@Sun.COM 
60798348SEric.Yu@Sun.COM 	if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) {
60808348SEric.Yu@Sun.COM 		*errorp = EPROTONOSUPPORT;
60818348SEric.Yu@Sun.COM 		return (NULL);
60828348SEric.Yu@Sun.COM 	}
60838348SEric.Yu@Sun.COM 
60848348SEric.Yu@Sun.COM 	connp = icmp_open(family, credp, errorp, flags);
60858348SEric.Yu@Sun.COM 	if (connp != NULL) {
60868348SEric.Yu@Sun.COM 		icmp_stack_t *is;
60878348SEric.Yu@Sun.COM 
60888348SEric.Yu@Sun.COM 		is = connp->conn_icmp->icmp_is;
60898348SEric.Yu@Sun.COM 		connp->conn_flags |= IPCL_NONSTR;
60908348SEric.Yu@Sun.COM 
60918348SEric.Yu@Sun.COM 		if (connp->conn_icmp->icmp_family == AF_INET6) {
60928348SEric.Yu@Sun.COM 			/* Build initial header template for transmit */
60938348SEric.Yu@Sun.COM 			rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER);
60948348SEric.Yu@Sun.COM 			if ((*errorp =
60958348SEric.Yu@Sun.COM 			    icmp_build_hdrs(connp->conn_icmp)) != 0) {
60968348SEric.Yu@Sun.COM 				rw_exit(&connp->conn_icmp->icmp_rwlock);
60978348SEric.Yu@Sun.COM 				ipcl_conn_destroy(connp);
60988348SEric.Yu@Sun.COM 				return (NULL);
60998348SEric.Yu@Sun.COM 			}
61008348SEric.Yu@Sun.COM 			rw_exit(&connp->conn_icmp->icmp_rwlock);
61018348SEric.Yu@Sun.COM 		}
61028348SEric.Yu@Sun.COM 
61038348SEric.Yu@Sun.COM 		connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat;
61048348SEric.Yu@Sun.COM 		connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat;
61058348SEric.Yu@Sun.COM 
61068348SEric.Yu@Sun.COM 		if ((*errorp = ip_create_helper_stream(connp,
61078348SEric.Yu@Sun.COM 		    is->is_ldi_ident)) != 0) {
61088348SEric.Yu@Sun.COM 			cmn_err(CE_CONT, "create of IP helper stream failed\n");
61098348SEric.Yu@Sun.COM 			(void) rawip_do_close(connp);
61108348SEric.Yu@Sun.COM 			return (NULL);
61118348SEric.Yu@Sun.COM 		}
61128348SEric.Yu@Sun.COM 
61138348SEric.Yu@Sun.COM 		mutex_enter(&connp->conn_lock);
61148348SEric.Yu@Sun.COM 		connp->conn_state_flags &= ~CONN_INCIPIENT;
61158348SEric.Yu@Sun.COM 		mutex_exit(&connp->conn_lock);
61168348SEric.Yu@Sun.COM 		*sock_downcalls = &sock_rawip_downcalls;
61178348SEric.Yu@Sun.COM 		*smodep = SM_ATOMIC;
61188348SEric.Yu@Sun.COM 	} else {
61198348SEric.Yu@Sun.COM 		ASSERT(*errorp != 0);
61208348SEric.Yu@Sun.COM 	}
61218348SEric.Yu@Sun.COM 
61228348SEric.Yu@Sun.COM 	return ((sock_lower_handle_t)connp);
61238348SEric.Yu@Sun.COM }
61248348SEric.Yu@Sun.COM 
61258348SEric.Yu@Sun.COM /* ARGSUSED */
61268348SEric.Yu@Sun.COM void
61278348SEric.Yu@Sun.COM rawip_activate(sock_lower_handle_t proto_handle,
61288348SEric.Yu@Sun.COM     sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags,
61298348SEric.Yu@Sun.COM     cred_t *cr)
61308348SEric.Yu@Sun.COM {
61318348SEric.Yu@Sun.COM 	conn_t 			*connp = (conn_t *)proto_handle;
61328348SEric.Yu@Sun.COM 	icmp_stack_t 		*is = connp->conn_icmp->icmp_is;
61338348SEric.Yu@Sun.COM 	struct sock_proto_props sopp;
61348348SEric.Yu@Sun.COM 
6135*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
6136*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
6137*8778SErik.Nordmark@Sun.COM 
61388348SEric.Yu@Sun.COM 	connp->conn_upcalls = sock_upcalls;
61398348SEric.Yu@Sun.COM 	connp->conn_upper_handle = sock_handle;
61408348SEric.Yu@Sun.COM 
61418348SEric.Yu@Sun.COM 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
61428348SEric.Yu@Sun.COM 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
61438348SEric.Yu@Sun.COM 	sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len +
61448348SEric.Yu@Sun.COM 	    is->is_wroff_extra;
61458348SEric.Yu@Sun.COM 	sopp.sopp_rxhiwat = is->is_recv_hiwat;
61468348SEric.Yu@Sun.COM 	sopp.sopp_rxlowat = icmp_mod_info.mi_lowat;
61478348SEric.Yu@Sun.COM 	sopp.sopp_maxblk = INFPSZ;
61488348SEric.Yu@Sun.COM 	sopp.sopp_maxpsz = IP_MAXPACKET;
61498348SEric.Yu@Sun.COM 	sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 :
61508348SEric.Yu@Sun.COM 	    icmp_mod_info.mi_minpsz;
61518348SEric.Yu@Sun.COM 
61528348SEric.Yu@Sun.COM 	(*connp->conn_upcalls->su_set_proto_props)
61538348SEric.Yu@Sun.COM 	    (connp->conn_upper_handle, &sopp);
61548348SEric.Yu@Sun.COM }
61558348SEric.Yu@Sun.COM 
61568348SEric.Yu@Sun.COM static int
61578348SEric.Yu@Sun.COM rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp)
61588348SEric.Yu@Sun.COM {
61598348SEric.Yu@Sun.COM 	sin_t	*sin = (sin_t *)sa;
61608348SEric.Yu@Sun.COM 	sin6_t	*sin6 = (sin6_t *)sa;
61618348SEric.Yu@Sun.COM 
61628348SEric.Yu@Sun.COM 	ASSERT(icmp != NULL);
61638348SEric.Yu@Sun.COM 	ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
61648348SEric.Yu@Sun.COM 
61658348SEric.Yu@Sun.COM 	switch (icmp->icmp_family) {
61668348SEric.Yu@Sun.COM 	case AF_INET:
61678348SEric.Yu@Sun.COM 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
61688348SEric.Yu@Sun.COM 		if (*salenp < sizeof (sin_t))
61698348SEric.Yu@Sun.COM 			return (EINVAL);
61708348SEric.Yu@Sun.COM 
61718348SEric.Yu@Sun.COM 		*salenp = sizeof (sin_t);
61728348SEric.Yu@Sun.COM 		*sin = sin_null;
61738348SEric.Yu@Sun.COM 		sin->sin_family = AF_INET;
61748348SEric.Yu@Sun.COM 		if (icmp->icmp_state == TS_UNBND) {
61758348SEric.Yu@Sun.COM 			break;
61768348SEric.Yu@Sun.COM 		}
61778348SEric.Yu@Sun.COM 
61788348SEric.Yu@Sun.COM 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
61798348SEric.Yu@Sun.COM 		    !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
61808348SEric.Yu@Sun.COM 			sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src);
61818348SEric.Yu@Sun.COM 		} else {
61828348SEric.Yu@Sun.COM 			/*
61838348SEric.Yu@Sun.COM 			 * INADDR_ANY
61848348SEric.Yu@Sun.COM 			 * icmp_v6src is not set, we might be bound to
61858348SEric.Yu@Sun.COM 			 * broadcast/multicast. Use icmp_bound_v6src as
61868348SEric.Yu@Sun.COM 			 * local address instead (that could
61878348SEric.Yu@Sun.COM 			 * also still be INADDR_ANY)
61888348SEric.Yu@Sun.COM 			 */
61898348SEric.Yu@Sun.COM 			sin->sin_addr.s_addr =
61908348SEric.Yu@Sun.COM 			    V4_PART_OF_V6(icmp->icmp_bound_v6src);
61918348SEric.Yu@Sun.COM 		}
61928348SEric.Yu@Sun.COM 		break;
61938348SEric.Yu@Sun.COM 	case AF_INET6:
61948348SEric.Yu@Sun.COM 
61958348SEric.Yu@Sun.COM 		if (*salenp < sizeof (sin6_t))
61968348SEric.Yu@Sun.COM 			return (EINVAL);
61978348SEric.Yu@Sun.COM 
61988348SEric.Yu@Sun.COM 		*salenp = sizeof (sin6_t);
61998348SEric.Yu@Sun.COM 		*sin6 = sin6_null;
62008348SEric.Yu@Sun.COM 		sin6->sin6_family = AF_INET6;
62018348SEric.Yu@Sun.COM 		if (icmp->icmp_state == TS_UNBND) {
62028348SEric.Yu@Sun.COM 			break;
62038348SEric.Yu@Sun.COM 		}
62048348SEric.Yu@Sun.COM 		if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
62058348SEric.Yu@Sun.COM 			sin6->sin6_addr = icmp->icmp_v6src;
62068348SEric.Yu@Sun.COM 		} else {
62078348SEric.Yu@Sun.COM 			/*
62088348SEric.Yu@Sun.COM 			 * UNSPECIFIED
62098348SEric.Yu@Sun.COM 			 * icmp_v6src is not set, we might be bound to
62108348SEric.Yu@Sun.COM 			 * broadcast/multicast. Use icmp_bound_v6src as
62118348SEric.Yu@Sun.COM 			 * local address instead (that could
62128348SEric.Yu@Sun.COM 			 * also still be UNSPECIFIED)
62138348SEric.Yu@Sun.COM 			 */
62148348SEric.Yu@Sun.COM 
62158348SEric.Yu@Sun.COM 			sin6->sin6_addr = icmp->icmp_bound_v6src;
62168348SEric.Yu@Sun.COM 		}
62178348SEric.Yu@Sun.COM 		break;
62188348SEric.Yu@Sun.COM 	}
62198348SEric.Yu@Sun.COM 	return (0);
62208348SEric.Yu@Sun.COM }
62218348SEric.Yu@Sun.COM 
62228348SEric.Yu@Sun.COM static int
62238348SEric.Yu@Sun.COM rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp)
62248348SEric.Yu@Sun.COM {
62258348SEric.Yu@Sun.COM 	sin_t   *sin = (sin_t *)sa;
62268348SEric.Yu@Sun.COM 	sin6_t  *sin6 = (sin6_t *)sa;
62278348SEric.Yu@Sun.COM 
62288348SEric.Yu@Sun.COM 	ASSERT(icmp != NULL);
62298348SEric.Yu@Sun.COM 	ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
62308348SEric.Yu@Sun.COM 
62318348SEric.Yu@Sun.COM 	if (icmp->icmp_state != TS_DATA_XFER)
62328348SEric.Yu@Sun.COM 		return (ENOTCONN);
62338348SEric.Yu@Sun.COM 
62348348SEric.Yu@Sun.COM 	sa->sa_family = icmp->icmp_family;
62358348SEric.Yu@Sun.COM 	switch (icmp->icmp_family) {
62368348SEric.Yu@Sun.COM 	case AF_INET:
62378348SEric.Yu@Sun.COM 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
62388348SEric.Yu@Sun.COM 
62398348SEric.Yu@Sun.COM 		if (*salenp < sizeof (sin_t))
62408348SEric.Yu@Sun.COM 			return (EINVAL);
62418348SEric.Yu@Sun.COM 
62428348SEric.Yu@Sun.COM 		*salenp = sizeof (sin_t);
62438348SEric.Yu@Sun.COM 		*sin = sin_null;
62448348SEric.Yu@Sun.COM 		sin->sin_family = AF_INET;
62458348SEric.Yu@Sun.COM 		sin->sin_addr.s_addr =
62468348SEric.Yu@Sun.COM 		    V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr);
62478348SEric.Yu@Sun.COM 		break;
62488348SEric.Yu@Sun.COM 	case AF_INET6:
62498348SEric.Yu@Sun.COM 		if (*salenp < sizeof (sin6_t))
62508348SEric.Yu@Sun.COM 			return (EINVAL);
62518348SEric.Yu@Sun.COM 
62528348SEric.Yu@Sun.COM 		*salenp = sizeof (sin6_t);
62538348SEric.Yu@Sun.COM 		*sin6 = sin6_null;
62548348SEric.Yu@Sun.COM 		*sin6 = icmp->icmp_v6dst;
62558348SEric.Yu@Sun.COM 		break;
62568348SEric.Yu@Sun.COM 	}
62578348SEric.Yu@Sun.COM 	return (0);
62588348SEric.Yu@Sun.COM }
62598348SEric.Yu@Sun.COM 
62608348SEric.Yu@Sun.COM /* ARGSUSED */
62618348SEric.Yu@Sun.COM int
62628348SEric.Yu@Sun.COM rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
62638348SEric.Yu@Sun.COM     socklen_t *salenp, cred_t *cr)
62648348SEric.Yu@Sun.COM {
62658348SEric.Yu@Sun.COM 	conn_t  *connp = (conn_t *)proto_handle;
62668348SEric.Yu@Sun.COM 	icmp_t  *icmp = connp->conn_icmp;
62678348SEric.Yu@Sun.COM 	int	error;
62688348SEric.Yu@Sun.COM 
6269*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
6270*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
6271*8778SErik.Nordmark@Sun.COM 
62728348SEric.Yu@Sun.COM 	ASSERT(icmp != NULL);
62738348SEric.Yu@Sun.COM 
62748348SEric.Yu@Sun.COM 	rw_enter(&icmp->icmp_rwlock, RW_READER);
62758348SEric.Yu@Sun.COM 
62768348SEric.Yu@Sun.COM 	error = rawip_do_getpeername(icmp, sa, salenp);
62778348SEric.Yu@Sun.COM 
62788348SEric.Yu@Sun.COM 	rw_exit(&icmp->icmp_rwlock);
62798348SEric.Yu@Sun.COM 
62808348SEric.Yu@Sun.COM 	return (error);
62818348SEric.Yu@Sun.COM }
62828348SEric.Yu@Sun.COM 
62838348SEric.Yu@Sun.COM /* ARGSUSED */
62848348SEric.Yu@Sun.COM int
62858348SEric.Yu@Sun.COM rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
62868348SEric.Yu@Sun.COM     socklen_t *salenp, cred_t *cr)
62878348SEric.Yu@Sun.COM {
62888348SEric.Yu@Sun.COM 	conn_t  *connp = (conn_t *)proto_handle;
62898348SEric.Yu@Sun.COM 	icmp_t	*icmp = connp->conn_icmp;
62908348SEric.Yu@Sun.COM 	int	error;
62918348SEric.Yu@Sun.COM 
6292*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
6293*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
6294*8778SErik.Nordmark@Sun.COM 
62958348SEric.Yu@Sun.COM 	ASSERT(icmp != NULL);
62968348SEric.Yu@Sun.COM 	rw_enter(&icmp->icmp_rwlock, RW_READER);
62978348SEric.Yu@Sun.COM 
62988348SEric.Yu@Sun.COM 	error = rawip_do_getsockname(icmp, sa, salenp);
62998348SEric.Yu@Sun.COM 
63008348SEric.Yu@Sun.COM 	rw_exit(&icmp->icmp_rwlock);
63018348SEric.Yu@Sun.COM 
63028348SEric.Yu@Sun.COM 	return (error);
63038348SEric.Yu@Sun.COM }
63048348SEric.Yu@Sun.COM 
63058348SEric.Yu@Sun.COM int
63068348SEric.Yu@Sun.COM rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
63078348SEric.Yu@Sun.COM     const void *optvalp, socklen_t optlen, cred_t *cr)
63088348SEric.Yu@Sun.COM {
63098348SEric.Yu@Sun.COM 	conn_t	*connp = (conn_t *)proto_handle;
63108348SEric.Yu@Sun.COM 	icmp_t *icmp = connp->conn_icmp;
63118348SEric.Yu@Sun.COM 	int error;
63128348SEric.Yu@Sun.COM 
6313*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
6314*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
6315*8778SErik.Nordmark@Sun.COM 
63168348SEric.Yu@Sun.COM 	error = proto_opt_check(level, option_name, optlen, NULL,
63178348SEric.Yu@Sun.COM 	    icmp_opt_obj.odb_opt_des_arr,
63188348SEric.Yu@Sun.COM 	    icmp_opt_obj.odb_opt_arr_cnt,
63198348SEric.Yu@Sun.COM 	    icmp_opt_obj.odb_topmost_tpiprovider,
63208348SEric.Yu@Sun.COM 	    B_TRUE, B_FALSE, cr);
63218348SEric.Yu@Sun.COM 
63228348SEric.Yu@Sun.COM 	if (error != 0) {
63238348SEric.Yu@Sun.COM 		/*
63248348SEric.Yu@Sun.COM 		 * option not recognized
63258348SEric.Yu@Sun.COM 		 */
63268348SEric.Yu@Sun.COM 		if (error < 0) {
63278348SEric.Yu@Sun.COM 			error = proto_tlitosyserr(-error);
63288348SEric.Yu@Sun.COM 		}
63298348SEric.Yu@Sun.COM 		return (error);
63308348SEric.Yu@Sun.COM 	}
63318348SEric.Yu@Sun.COM 
63328348SEric.Yu@Sun.COM 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
63338348SEric.Yu@Sun.COM 	error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level,
63348348SEric.Yu@Sun.COM 	    option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen,
63358348SEric.Yu@Sun.COM 	    (uchar_t *)optvalp, NULL, cr);
63368348SEric.Yu@Sun.COM 	rw_exit(&icmp->icmp_rwlock);
63378348SEric.Yu@Sun.COM 
63388348SEric.Yu@Sun.COM 	if (error < 0) {
63398348SEric.Yu@Sun.COM 		/*
63408348SEric.Yu@Sun.COM 		 * Pass on to ip
63418348SEric.Yu@Sun.COM 		 */
63428348SEric.Yu@Sun.COM 		error = ip_set_options(connp, level, option_name, optvalp,
63438348SEric.Yu@Sun.COM 		    optlen, cr);
63448348SEric.Yu@Sun.COM 	}
63458348SEric.Yu@Sun.COM 
63468348SEric.Yu@Sun.COM 	ASSERT(error >= 0);
63478348SEric.Yu@Sun.COM 
63488348SEric.Yu@Sun.COM 	return (error);
63498348SEric.Yu@Sun.COM }
63508348SEric.Yu@Sun.COM 
63518348SEric.Yu@Sun.COM int
63528348SEric.Yu@Sun.COM rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
63538348SEric.Yu@Sun.COM     void *optvalp, socklen_t *optlen, cred_t *cr)
63548348SEric.Yu@Sun.COM {
63558348SEric.Yu@Sun.COM 	int		error;
63568348SEric.Yu@Sun.COM 	conn_t		*connp = (conn_t *)proto_handle;
63578348SEric.Yu@Sun.COM 	icmp_t		*icmp = connp->conn_icmp;
63588348SEric.Yu@Sun.COM 	t_uscalar_t	max_optbuf_len;
63598348SEric.Yu@Sun.COM 	void		*optvalp_buf;
63608348SEric.Yu@Sun.COM 	int		len;
63618348SEric.Yu@Sun.COM 
6362*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
6363*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
6364*8778SErik.Nordmark@Sun.COM 
63658348SEric.Yu@Sun.COM 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
63668348SEric.Yu@Sun.COM 	    icmp_opt_obj.odb_opt_des_arr,
63678348SEric.Yu@Sun.COM 	    icmp_opt_obj.odb_opt_arr_cnt,
63688348SEric.Yu@Sun.COM 	    icmp_opt_obj.odb_topmost_tpiprovider,
63698348SEric.Yu@Sun.COM 	    B_FALSE, B_TRUE, cr);
63708348SEric.Yu@Sun.COM 
63718348SEric.Yu@Sun.COM 	if (error != 0) {
63728348SEric.Yu@Sun.COM 		if (error < 0) {
63738348SEric.Yu@Sun.COM 			error = proto_tlitosyserr(-error);
63748348SEric.Yu@Sun.COM 		}
63758348SEric.Yu@Sun.COM 		return (error);
63768348SEric.Yu@Sun.COM 	}
63778348SEric.Yu@Sun.COM 
63788348SEric.Yu@Sun.COM 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
63798348SEric.Yu@Sun.COM 	rw_enter(&icmp->icmp_rwlock, RW_READER);
63808348SEric.Yu@Sun.COM 	len = icmp_opt_get(connp, level, option_name, optvalp_buf);
63818348SEric.Yu@Sun.COM 	rw_exit(&icmp->icmp_rwlock);
63828348SEric.Yu@Sun.COM 
63838348SEric.Yu@Sun.COM 	if (len < 0) {
63848348SEric.Yu@Sun.COM 		/*
63858348SEric.Yu@Sun.COM 		 * Pass on to IP
63868348SEric.Yu@Sun.COM 		 */
63878348SEric.Yu@Sun.COM 		kmem_free(optvalp_buf, max_optbuf_len);
63888348SEric.Yu@Sun.COM 		return (ip_get_options(connp, level, option_name, optvalp,
63898348SEric.Yu@Sun.COM 		    optlen, cr));
63908348SEric.Yu@Sun.COM 	} else {
63918348SEric.Yu@Sun.COM 		/*
63928348SEric.Yu@Sun.COM 		 * update optlen and copy option value
63938348SEric.Yu@Sun.COM 		 */
63948348SEric.Yu@Sun.COM 		t_uscalar_t size = MIN(len, *optlen);
63958348SEric.Yu@Sun.COM 		bcopy(optvalp_buf, optvalp, size);
63968348SEric.Yu@Sun.COM 		bcopy(&size, optlen, sizeof (size));
63978348SEric.Yu@Sun.COM 
63988348SEric.Yu@Sun.COM 		kmem_free(optvalp_buf, max_optbuf_len);
63998348SEric.Yu@Sun.COM 		return (0);
64008348SEric.Yu@Sun.COM 	}
64018348SEric.Yu@Sun.COM }
64028348SEric.Yu@Sun.COM 
64038348SEric.Yu@Sun.COM /* ARGSUSED */
64048348SEric.Yu@Sun.COM int
64058348SEric.Yu@Sun.COM rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
64068348SEric.Yu@Sun.COM {
64078348SEric.Yu@Sun.COM 	conn_t	*connp = (conn_t *)proto_handle;
6408*8778SErik.Nordmark@Sun.COM 
6409*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
6410*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
6411*8778SErik.Nordmark@Sun.COM 
64128348SEric.Yu@Sun.COM 	(void) rawip_do_close(connp);
64138348SEric.Yu@Sun.COM 	return (0);
64148348SEric.Yu@Sun.COM }
64158348SEric.Yu@Sun.COM 
64168348SEric.Yu@Sun.COM /* ARGSUSED */
64178348SEric.Yu@Sun.COM int
64188348SEric.Yu@Sun.COM rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
64198348SEric.Yu@Sun.COM {
64208348SEric.Yu@Sun.COM 	conn_t  *connp = (conn_t *)proto_handle;
64218348SEric.Yu@Sun.COM 
6422*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
6423*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
6424*8778SErik.Nordmark@Sun.COM 
64258348SEric.Yu@Sun.COM 	/* shut down the send side */
64268348SEric.Yu@Sun.COM 	if (how != SHUT_RD)
64278348SEric.Yu@Sun.COM 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
64288348SEric.Yu@Sun.COM 		    SOCK_OPCTL_SHUT_SEND, 0);
64298348SEric.Yu@Sun.COM 	/* shut down the recv side */
64308348SEric.Yu@Sun.COM 	if (how != SHUT_WR)
64318348SEric.Yu@Sun.COM 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
64328348SEric.Yu@Sun.COM 		    SOCK_OPCTL_SHUT_RECV, 0);
64338348SEric.Yu@Sun.COM 	return (0);
64348348SEric.Yu@Sun.COM }
64358348SEric.Yu@Sun.COM 
64368348SEric.Yu@Sun.COM void
64378348SEric.Yu@Sun.COM rawip_clr_flowctrl(sock_lower_handle_t proto_handle)
64388348SEric.Yu@Sun.COM {
64398348SEric.Yu@Sun.COM 	conn_t  *connp = (conn_t *)proto_handle;
64408348SEric.Yu@Sun.COM 	icmp_t	*icmp = connp->conn_icmp;
64418348SEric.Yu@Sun.COM 
64428348SEric.Yu@Sun.COM 	mutex_enter(&icmp->icmp_recv_lock);
64438348SEric.Yu@Sun.COM 	connp->conn_flow_cntrld = B_FALSE;
64448348SEric.Yu@Sun.COM 	mutex_exit(&icmp->icmp_recv_lock);
64458348SEric.Yu@Sun.COM }
64468348SEric.Yu@Sun.COM 
64478348SEric.Yu@Sun.COM int
64488348SEric.Yu@Sun.COM rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
64498348SEric.Yu@Sun.COM     int mode, int32_t *rvalp, cred_t *cr)
64508348SEric.Yu@Sun.COM {
64518348SEric.Yu@Sun.COM 	conn_t  	*connp = (conn_t *)proto_handle;
64528348SEric.Yu@Sun.COM 	int		error;
64538348SEric.Yu@Sun.COM 
6454*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
6455*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
6456*8778SErik.Nordmark@Sun.COM 
64578348SEric.Yu@Sun.COM 	switch (cmd) {
64588348SEric.Yu@Sun.COM 	case ND_SET:
64598348SEric.Yu@Sun.COM 	case ND_GET:
64608348SEric.Yu@Sun.COM 	case _SIOCSOCKFALLBACK:
64618348SEric.Yu@Sun.COM 	case TI_GETPEERNAME:
64628348SEric.Yu@Sun.COM 	case TI_GETMYNAME:
64638348SEric.Yu@Sun.COM #ifdef DEBUG
64648348SEric.Yu@Sun.COM 		cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams"
64658348SEric.Yu@Sun.COM 		    " socket", cmd);
64668348SEric.Yu@Sun.COM #endif
64678348SEric.Yu@Sun.COM 		error = EINVAL;
64688348SEric.Yu@Sun.COM 		break;
64698348SEric.Yu@Sun.COM 	default:
64708348SEric.Yu@Sun.COM 		/*
64718348SEric.Yu@Sun.COM 		 * Pass on to IP using helper stream
64728348SEric.Yu@Sun.COM 		 */
64738444SRao.Shoaib@Sun.COM 		error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
64748348SEric.Yu@Sun.COM 		    cmd, arg, mode, cr, rvalp);
64758348SEric.Yu@Sun.COM 		break;
64768348SEric.Yu@Sun.COM 	}
64778348SEric.Yu@Sun.COM 	return (error);
64788348SEric.Yu@Sun.COM }
64798348SEric.Yu@Sun.COM 
64808348SEric.Yu@Sun.COM /* ARGSUSED */
64818348SEric.Yu@Sun.COM int
64828348SEric.Yu@Sun.COM rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
64838348SEric.Yu@Sun.COM     cred_t *cr)
64848348SEric.Yu@Sun.COM {
64858348SEric.Yu@Sun.COM 	conn_t *connp = (conn_t *)proto_handle;
64868348SEric.Yu@Sun.COM 	icmp_t	*icmp = connp->conn_icmp;
64878348SEric.Yu@Sun.COM 	icmp_stack_t *is = icmp->icmp_is;
64888348SEric.Yu@Sun.COM 	int error = 0;
64898348SEric.Yu@Sun.COM 	boolean_t bypass_dgram_errind = B_FALSE;
64908348SEric.Yu@Sun.COM 
64918348SEric.Yu@Sun.COM 	ASSERT(DB_TYPE(mp) == M_DATA);
64928348SEric.Yu@Sun.COM 
6493*8778SErik.Nordmark@Sun.COM 	/* All Solaris components should pass a cred for this operation. */
6494*8778SErik.Nordmark@Sun.COM 	ASSERT(cr != NULL);
6495*8778SErik.Nordmark@Sun.COM 
6496*8778SErik.Nordmark@Sun.COM 	/* If labeled then sockfs should have already set db_credp */
6497*8778SErik.Nordmark@Sun.COM 	ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL);
64988348SEric.Yu@Sun.COM 
64998348SEric.Yu@Sun.COM 	/* do an implicit bind if necessary */
65008348SEric.Yu@Sun.COM 	if (icmp->icmp_state == TS_UNBND) {
65018348SEric.Yu@Sun.COM 		error = rawip_implicit_bind(connp);
65028348SEric.Yu@Sun.COM 		/*
65038348SEric.Yu@Sun.COM 		 * We could be racing with an actual bind, in which case
65048348SEric.Yu@Sun.COM 		 * we would see EPROTO. We cross our fingers and try
65058348SEric.Yu@Sun.COM 		 * to connect.
65068348SEric.Yu@Sun.COM 		 */
65078348SEric.Yu@Sun.COM 		if (!(error == 0 || error == EPROTO)) {
65088348SEric.Yu@Sun.COM 			freemsg(mp);
65098348SEric.Yu@Sun.COM 			return (error);
65108348SEric.Yu@Sun.COM 		}
65118348SEric.Yu@Sun.COM 	}
65128348SEric.Yu@Sun.COM 
65138348SEric.Yu@Sun.COM 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
65148348SEric.Yu@Sun.COM 
65158348SEric.Yu@Sun.COM 	if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) {
65168348SEric.Yu@Sun.COM 		error = EISCONN;
65178348SEric.Yu@Sun.COM 		goto done_lock;
65188348SEric.Yu@Sun.COM 	}
65198348SEric.Yu@Sun.COM 
65208348SEric.Yu@Sun.COM 	switch (icmp->icmp_family) {
65218348SEric.Yu@Sun.COM 	case AF_INET6: {
65228348SEric.Yu@Sun.COM 		sin6_t	*sin6;
65238348SEric.Yu@Sun.COM 		ip6_pkt_t	ipp_s;	/* For ancillary data options */
65248348SEric.Yu@Sun.COM 		ip6_pkt_t	*ipp = &ipp_s;
65258348SEric.Yu@Sun.COM 
65268348SEric.Yu@Sun.COM 		sin6 = (sin6_t *)msg->msg_name;
65278348SEric.Yu@Sun.COM 		if (sin6 != NULL) {
65288348SEric.Yu@Sun.COM 			error = proto_verify_ip_addr(icmp->icmp_family,
65298348SEric.Yu@Sun.COM 			    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
65308348SEric.Yu@Sun.COM 			if (error != 0) {
65318348SEric.Yu@Sun.COM 				bypass_dgram_errind = B_TRUE;
65328348SEric.Yu@Sun.COM 				goto done_lock;
65338348SEric.Yu@Sun.COM 			}
65348348SEric.Yu@Sun.COM 			if (icmp->icmp_delayed_error != 0) {
65358348SEric.Yu@Sun.COM 				sin6_t  *sin1 = (sin6_t *)msg->msg_name;
65368348SEric.Yu@Sun.COM 				sin6_t  *sin2 = (sin6_t *)
65378348SEric.Yu@Sun.COM 				    &icmp->icmp_delayed_addr;
65388348SEric.Yu@Sun.COM 
65398348SEric.Yu@Sun.COM 				error = icmp->icmp_delayed_error;
65408348SEric.Yu@Sun.COM 				icmp->icmp_delayed_error = 0;
65418348SEric.Yu@Sun.COM 
65428348SEric.Yu@Sun.COM 				/* Compare IP address and port */
65438348SEric.Yu@Sun.COM 
65448348SEric.Yu@Sun.COM 				if (sin1->sin6_port == sin2->sin6_port &&
65458348SEric.Yu@Sun.COM 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
65468348SEric.Yu@Sun.COM 				    &sin2->sin6_addr)) {
65478348SEric.Yu@Sun.COM 					goto done_lock;
65488348SEric.Yu@Sun.COM 				}
65498348SEric.Yu@Sun.COM 			}
65508348SEric.Yu@Sun.COM 		} else {
65518348SEric.Yu@Sun.COM 			/*
65528348SEric.Yu@Sun.COM 			 * Use connected address
65538348SEric.Yu@Sun.COM 			 */
65548348SEric.Yu@Sun.COM 			if (icmp->icmp_state != TS_DATA_XFER) {
65558348SEric.Yu@Sun.COM 				BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
65568348SEric.Yu@Sun.COM 				error = EDESTADDRREQ;
65578348SEric.Yu@Sun.COM 				bypass_dgram_errind = B_TRUE;
65588348SEric.Yu@Sun.COM 				goto done_lock;
65598348SEric.Yu@Sun.COM 			}
65608348SEric.Yu@Sun.COM 			sin6 = &icmp->icmp_v6dst;
65618348SEric.Yu@Sun.COM 		}
65628348SEric.Yu@Sun.COM 
65638348SEric.Yu@Sun.COM 		/* No support for mapped addresses on raw sockets */
65648348SEric.Yu@Sun.COM 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
65658348SEric.Yu@Sun.COM 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
65668348SEric.Yu@Sun.COM 			error = EADDRNOTAVAIL;
65678348SEric.Yu@Sun.COM 			goto done_lock;
65688348SEric.Yu@Sun.COM 		}
65698348SEric.Yu@Sun.COM 
65708348SEric.Yu@Sun.COM 		ipp->ipp_fields = 0;
65718348SEric.Yu@Sun.COM 		ipp->ipp_sticky_ignored = 0;
65728348SEric.Yu@Sun.COM 
65738348SEric.Yu@Sun.COM 		/*
65748348SEric.Yu@Sun.COM 		 * If options passed in, feed it for verification and handling
65758348SEric.Yu@Sun.COM 		 */
65768348SEric.Yu@Sun.COM 		if (msg->msg_controllen != 0) {
65778348SEric.Yu@Sun.COM 			error = process_auxiliary_options(connp,
65788348SEric.Yu@Sun.COM 			    msg->msg_control, msg->msg_controllen,
6579*8778SErik.Nordmark@Sun.COM 			    ipp, &icmp_opt_obj, icmp_opt_set, cr);
65808348SEric.Yu@Sun.COM 			if (error != 0) {
65818348SEric.Yu@Sun.COM 				goto done_lock;
65828348SEric.Yu@Sun.COM 			}
65838348SEric.Yu@Sun.COM 		}
65848348SEric.Yu@Sun.COM 
65858348SEric.Yu@Sun.COM 		rw_exit(&icmp->icmp_rwlock);
65868348SEric.Yu@Sun.COM 
65878348SEric.Yu@Sun.COM 		/*
65888348SEric.Yu@Sun.COM 		 * Destination is a native IPv6 address.
65898348SEric.Yu@Sun.COM 		 * Send out an IPv6 format packet.
65908348SEric.Yu@Sun.COM 		 */
65918348SEric.Yu@Sun.COM 
65928348SEric.Yu@Sun.COM 		error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6,
65938348SEric.Yu@Sun.COM 		    ipp);
65948348SEric.Yu@Sun.COM 	}
65958348SEric.Yu@Sun.COM 		break;
65968348SEric.Yu@Sun.COM 	case AF_INET: {
65978348SEric.Yu@Sun.COM 		sin_t	*sin;
65988348SEric.Yu@Sun.COM 		ip4_pkt_t pktinfo;
65998348SEric.Yu@Sun.COM 		ip4_pkt_t *pktinfop = &pktinfo;
66008348SEric.Yu@Sun.COM 		ipaddr_t	v4dst;
66018348SEric.Yu@Sun.COM 
66028348SEric.Yu@Sun.COM 		sin = (sin_t *)msg->msg_name;
66038348SEric.Yu@Sun.COM 		if (sin != NULL) {
66048348SEric.Yu@Sun.COM 			error = proto_verify_ip_addr(icmp->icmp_family,
66058348SEric.Yu@Sun.COM 			    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
66068348SEric.Yu@Sun.COM 			if (error != 0) {
66078348SEric.Yu@Sun.COM 				bypass_dgram_errind = B_TRUE;
66088348SEric.Yu@Sun.COM 				goto done_lock;
66098348SEric.Yu@Sun.COM 			}
66108348SEric.Yu@Sun.COM 			v4dst = sin->sin_addr.s_addr;
66118348SEric.Yu@Sun.COM 			if (icmp->icmp_delayed_error != 0) {
66128348SEric.Yu@Sun.COM 				sin_t *sin1 = (sin_t *)msg->msg_name;
66138348SEric.Yu@Sun.COM 				sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr;
66148348SEric.Yu@Sun.COM 
66158348SEric.Yu@Sun.COM 				error = icmp->icmp_delayed_error;
66168348SEric.Yu@Sun.COM 				icmp->icmp_delayed_error = 0;
66178348SEric.Yu@Sun.COM 
66188348SEric.Yu@Sun.COM 				/* Compare IP address and port */
66198348SEric.Yu@Sun.COM 				if (sin1->sin_port == sin2->sin_port &&
66208348SEric.Yu@Sun.COM 				    sin1->sin_addr.s_addr ==
66218348SEric.Yu@Sun.COM 				    sin2->sin_addr.s_addr) {
66228348SEric.Yu@Sun.COM 					goto done_lock;
66238348SEric.Yu@Sun.COM 				}
66248348SEric.Yu@Sun.COM 
66258348SEric.Yu@Sun.COM 			}
66268348SEric.Yu@Sun.COM 		} else {
66278348SEric.Yu@Sun.COM 			/*
66288348SEric.Yu@Sun.COM 			 * Use connected address
66298348SEric.Yu@Sun.COM 			 */
66308348SEric.Yu@Sun.COM 			if (icmp->icmp_state != TS_DATA_XFER) {
66318348SEric.Yu@Sun.COM 				BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
66328348SEric.Yu@Sun.COM 				error = EDESTADDRREQ;
66338348SEric.Yu@Sun.COM 				bypass_dgram_errind = B_TRUE;
66348348SEric.Yu@Sun.COM 				goto done_lock;
66358348SEric.Yu@Sun.COM 			}
66368348SEric.Yu@Sun.COM 			v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr);
66378348SEric.Yu@Sun.COM 		}
66388348SEric.Yu@Sun.COM 
66398348SEric.Yu@Sun.COM 
66408348SEric.Yu@Sun.COM 		pktinfop->ip4_ill_index = 0;
66418348SEric.Yu@Sun.COM 		pktinfop->ip4_addr = INADDR_ANY;
66428348SEric.Yu@Sun.COM 
66438348SEric.Yu@Sun.COM 		/*
66448348SEric.Yu@Sun.COM 		 * If options passed in, feed it for verification and handling
66458348SEric.Yu@Sun.COM 		 */
66468348SEric.Yu@Sun.COM 		if (msg->msg_controllen != 0) {
66478348SEric.Yu@Sun.COM 			error = process_auxiliary_options(connp,
66488348SEric.Yu@Sun.COM 			    msg->msg_control, msg->msg_controllen,
6649*8778SErik.Nordmark@Sun.COM 			    pktinfop, &icmp_opt_obj, icmp_opt_set, cr);
66508348SEric.Yu@Sun.COM 			if (error != 0) {
66518348SEric.Yu@Sun.COM 				goto done_lock;
66528348SEric.Yu@Sun.COM 			}
66538348SEric.Yu@Sun.COM 		}
66548348SEric.Yu@Sun.COM 		rw_exit(&icmp->icmp_rwlock);
66558348SEric.Yu@Sun.COM 
66568348SEric.Yu@Sun.COM 		error = raw_ip_send_data_v4(connp->conn_wq, connp, mp,
66578348SEric.Yu@Sun.COM 		    v4dst, pktinfop);
66588348SEric.Yu@Sun.COM 		break;
66598348SEric.Yu@Sun.COM 	}
66608348SEric.Yu@Sun.COM 
66618348SEric.Yu@Sun.COM 	default:
66628348SEric.Yu@Sun.COM 		ASSERT(0);
66638348SEric.Yu@Sun.COM 	}
66648348SEric.Yu@Sun.COM 
66658348SEric.Yu@Sun.COM 	goto done;
66668348SEric.Yu@Sun.COM 
66678348SEric.Yu@Sun.COM done_lock:
66688348SEric.Yu@Sun.COM 	rw_exit(&icmp->icmp_rwlock);
66698348SEric.Yu@Sun.COM 	if (error != 0) {
66708348SEric.Yu@Sun.COM 		ASSERT(mp != NULL);
66718348SEric.Yu@Sun.COM 		freemsg(mp);
66728348SEric.Yu@Sun.COM 	}
66738348SEric.Yu@Sun.COM done:
66748348SEric.Yu@Sun.COM 	if (bypass_dgram_errind)
66758348SEric.Yu@Sun.COM 		return (error);
66768348SEric.Yu@Sun.COM 	return (icmp->icmp_dgram_errind ? error : 0);
66778348SEric.Yu@Sun.COM }
66788348SEric.Yu@Sun.COM 
66798348SEric.Yu@Sun.COM sock_downcalls_t sock_rawip_downcalls = {
66808348SEric.Yu@Sun.COM 	rawip_activate,
66818348SEric.Yu@Sun.COM 	rawip_accept,
66828348SEric.Yu@Sun.COM 	rawip_bind,
66838348SEric.Yu@Sun.COM 	rawip_listen,
66848348SEric.Yu@Sun.COM 	rawip_connect,
66858348SEric.Yu@Sun.COM 	rawip_getpeername,
66868348SEric.Yu@Sun.COM 	rawip_getsockname,
66878348SEric.Yu@Sun.COM 	rawip_getsockopt,
66888348SEric.Yu@Sun.COM 	rawip_setsockopt,
66898348SEric.Yu@Sun.COM 	rawip_send,
66908348SEric.Yu@Sun.COM 	NULL,
66918348SEric.Yu@Sun.COM 	NULL,
66928348SEric.Yu@Sun.COM 	NULL,
66938348SEric.Yu@Sun.COM 	rawip_shutdown,
66948348SEric.Yu@Sun.COM 	rawip_clr_flowctrl,
66958348SEric.Yu@Sun.COM 	rawip_ioctl,
66968348SEric.Yu@Sun.COM 	rawip_close
66978348SEric.Yu@Sun.COM };
6698