xref: /onnv-gate/usr/src/uts/common/inet/ip/ipclassifier.c (revision 8485:633e5b5eb268)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51503Sericheng  * Common Development and Distribution License (the "License").
61503Sericheng  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*8485SPeter.Memishian@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate  * IP PACKET CLASSIFIER
280Sstevel@tonic-gate  *
290Sstevel@tonic-gate  * The IP packet classifier provides mapping between IP packets and persistent
300Sstevel@tonic-gate  * connection state for connection-oriented protocols. It also provides
310Sstevel@tonic-gate  * interface for managing connection states.
320Sstevel@tonic-gate  *
330Sstevel@tonic-gate  * The connection state is kept in conn_t data structure and contains, among
340Sstevel@tonic-gate  * other things:
350Sstevel@tonic-gate  *
360Sstevel@tonic-gate  *	o local/remote address and ports
370Sstevel@tonic-gate  *	o Transport protocol
380Sstevel@tonic-gate  *	o squeue for the connection (for TCP only)
390Sstevel@tonic-gate  *	o reference counter
400Sstevel@tonic-gate  *	o Connection state
410Sstevel@tonic-gate  *	o hash table linkage
420Sstevel@tonic-gate  *	o interface/ire information
430Sstevel@tonic-gate  *	o credentials
440Sstevel@tonic-gate  *	o ipsec policy
450Sstevel@tonic-gate  *	o send and receive functions.
460Sstevel@tonic-gate  *	o mutex lock.
470Sstevel@tonic-gate  *
480Sstevel@tonic-gate  * Connections use a reference counting scheme. They are freed when the
490Sstevel@tonic-gate  * reference counter drops to zero. A reference is incremented when connection
500Sstevel@tonic-gate  * is placed in a list or table, when incoming packet for the connection arrives
510Sstevel@tonic-gate  * and when connection is processed via squeue (squeue processing may be
520Sstevel@tonic-gate  * asynchronous and the reference protects the connection from being destroyed
530Sstevel@tonic-gate  * before its processing is finished).
540Sstevel@tonic-gate  *
550Sstevel@tonic-gate  * send and receive functions are currently used for TCP only. The send function
560Sstevel@tonic-gate  * determines the IP entry point for the packet once it leaves TCP to be sent to
570Sstevel@tonic-gate  * the destination address. The receive function is used by IP when the packet
580Sstevel@tonic-gate  * should be passed for TCP processing. When a new connection is created these
590Sstevel@tonic-gate  * are set to ip_output() and tcp_input() respectively. During the lifetime of
600Sstevel@tonic-gate  * the connection the send and receive functions may change depending on the
610Sstevel@tonic-gate  * changes in the connection state. For example, Once the connection is bound to
620Sstevel@tonic-gate  * an addresse, the receive function for this connection is set to
630Sstevel@tonic-gate  * tcp_conn_request().  This allows incoming SYNs to go directly into the
640Sstevel@tonic-gate  * listener SYN processing function without going to tcp_input() first.
650Sstevel@tonic-gate  *
660Sstevel@tonic-gate  * Classifier uses several hash tables:
670Sstevel@tonic-gate  *
680Sstevel@tonic-gate  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
690Sstevel@tonic-gate  *	ipcl_bind_fanout:	contains all connections in BOUND state
700Sstevel@tonic-gate  *	ipcl_proto_fanout:	IPv4 protocol fanout
710Sstevel@tonic-gate  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
720Sstevel@tonic-gate  *	ipcl_udp_fanout:	contains all UDP connections
730Sstevel@tonic-gate  *	ipcl_globalhash_fanout:	contains all connections
740Sstevel@tonic-gate  *
750Sstevel@tonic-gate  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
760Sstevel@tonic-gate  * which need to view all existing connections.
770Sstevel@tonic-gate  *
780Sstevel@tonic-gate  * All tables are protected by per-bucket locks. When both per-bucket lock and
790Sstevel@tonic-gate  * connection lock need to be held, the per-bucket lock should be acquired
800Sstevel@tonic-gate  * first, followed by the connection lock.
810Sstevel@tonic-gate  *
820Sstevel@tonic-gate  * All functions doing search in one of these tables increment a reference
830Sstevel@tonic-gate  * counter on the connection found (if any). This reference should be dropped
840Sstevel@tonic-gate  * when the caller has finished processing the connection.
850Sstevel@tonic-gate  *
860Sstevel@tonic-gate  *
870Sstevel@tonic-gate  * INTERFACES:
880Sstevel@tonic-gate  * ===========
890Sstevel@tonic-gate  *
900Sstevel@tonic-gate  * Connection Lookup:
910Sstevel@tonic-gate  * ------------------
920Sstevel@tonic-gate  *
933448Sdh155122  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack)
943448Sdh155122  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack)
950Sstevel@tonic-gate  *
960Sstevel@tonic-gate  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
970Sstevel@tonic-gate  * it can't find any associated connection. If the connection is found, its
980Sstevel@tonic-gate  * reference counter is incremented.
990Sstevel@tonic-gate  *
1000Sstevel@tonic-gate  *	mp:	mblock, containing packet header. The full header should fit
1010Sstevel@tonic-gate  *		into a single mblock. It should also contain at least full IP
1020Sstevel@tonic-gate  *		and TCP or UDP header.
1030Sstevel@tonic-gate  *
1040Sstevel@tonic-gate  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
1050Sstevel@tonic-gate  *
1060Sstevel@tonic-gate  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
1070Sstevel@tonic-gate  *		 the packet.
1080Sstevel@tonic-gate  *
1091676Sjpk  * 	zoneid: The zone in which the returned connection must be; the zoneid
1101676Sjpk  *		corresponding to the ire_zoneid on the IRE located for the
1111676Sjpk  *		packet's destination address.
1120Sstevel@tonic-gate  *
1130Sstevel@tonic-gate  *	For TCP connections, the lookup order is as follows:
1140Sstevel@tonic-gate  *		5-tuple {src, dst, protocol, local port, remote port}
1150Sstevel@tonic-gate  *			lookup in ipcl_conn_fanout table.
1160Sstevel@tonic-gate  *		3-tuple {dst, remote port, protocol} lookup in
1170Sstevel@tonic-gate  *			ipcl_bind_fanout table.
1180Sstevel@tonic-gate  *
1190Sstevel@tonic-gate  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
1200Sstevel@tonic-gate  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
1210Sstevel@tonic-gate  *	these interfaces do not handle cases where a packets belongs
1220Sstevel@tonic-gate  *	to multiple UDP clients, which is handled in IP itself.
1230Sstevel@tonic-gate  *
1241676Sjpk  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
1251676Sjpk  * determine which actual zone gets the segment.  This is used only in a
1261676Sjpk  * labeled environment.  The matching rules are:
1271676Sjpk  *
1281676Sjpk  *	- If it's not a multilevel port, then the label on the packet selects
1291676Sjpk  *	  the zone.  Unlabeled packets are delivered to the global zone.
1301676Sjpk  *
1311676Sjpk  *	- If it's a multilevel port, then only the zone registered to receive
1321676Sjpk  *	  packets on that port matches.
1331676Sjpk  *
1341676Sjpk  * Also, in a labeled environment, packet labels need to be checked.  For fully
1351676Sjpk  * bound TCP connections, we can assume that the packet label was checked
1361676Sjpk  * during connection establishment, and doesn't need to be checked on each
1371676Sjpk  * packet.  For others, though, we need to check for strict equality or, for
1381676Sjpk  * multilevel ports, membership in the range or set.  This part currently does
1391676Sjpk  * a tnrh lookup on each packet, but could be optimized to use cached results
1401676Sjpk  * if that were necessary.  (SCTP doesn't come through here, but if it did,
1411676Sjpk  * we would apply the same rules as TCP.)
1421676Sjpk  *
1431676Sjpk  * An implication of the above is that fully-bound TCP sockets must always use
1441676Sjpk  * distinct 4-tuples; they can't be discriminated by label alone.
1451676Sjpk  *
1461676Sjpk  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
1471676Sjpk  * as there's no connection set-up handshake and no shared state.
1481676Sjpk  *
1491676Sjpk  * Labels on looped-back packets within a single zone do not need to be
1501676Sjpk  * checked, as all processes in the same zone have the same label.
1511676Sjpk  *
1521676Sjpk  * Finally, for unlabeled packets received by a labeled system, special rules
1531676Sjpk  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
1541676Sjpk  * socket in the zone whose label matches the default label of the sender, if
1551676Sjpk  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
1561676Sjpk  * receiver's label must dominate the sender's default label.
1571676Sjpk  *
1583448Sdh155122  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack);
1593448Sdh155122  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
1603448Sdh155122  *					 ip_stack);
1610Sstevel@tonic-gate  *
1620Sstevel@tonic-gate  *	Lookup routine to find a exact match for {src, dst, local port,
1630Sstevel@tonic-gate  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
1640Sstevel@tonic-gate  *	ports are read from the IP and TCP header respectively.
1650Sstevel@tonic-gate  *
1663448Sdh155122  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol,
1673448Sdh155122  *					 zoneid, ip_stack);
1683448Sdh155122  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
1693448Sdh155122  *					 zoneid, ip_stack);
1700Sstevel@tonic-gate  *
1710Sstevel@tonic-gate  * 	Lookup routine to find a listener with the tuple {lport, laddr,
1720Sstevel@tonic-gate  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
1730Sstevel@tonic-gate  * 	parameter interface index is also compared.
1740Sstevel@tonic-gate  *
1753448Sdh155122  * void ipcl_walk(func, arg, ip_stack)
1760Sstevel@tonic-gate  *
1770Sstevel@tonic-gate  * 	Apply 'func' to every connection available. The 'func' is called as
1780Sstevel@tonic-gate  *	(*func)(connp, arg). The walk is non-atomic so connections may be
1790Sstevel@tonic-gate  *	created and destroyed during the walk. The CONN_CONDEMNED and
1800Sstevel@tonic-gate  *	CONN_INCIPIENT flags ensure that connections which are newly created
1810Sstevel@tonic-gate  *	or being destroyed are not selected by the walker.
1820Sstevel@tonic-gate  *
1830Sstevel@tonic-gate  * Table Updates
1840Sstevel@tonic-gate  * -------------
1850Sstevel@tonic-gate  *
1860Sstevel@tonic-gate  * int ipcl_conn_insert(connp, protocol, src, dst, ports)
1870Sstevel@tonic-gate  * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex)
1880Sstevel@tonic-gate  *
1890Sstevel@tonic-gate  *	Insert 'connp' in the ipcl_conn_fanout.
1900Sstevel@tonic-gate  *	Arguements :
1910Sstevel@tonic-gate  *		connp		conn_t to be inserted
1920Sstevel@tonic-gate  *		protocol	connection protocol
1930Sstevel@tonic-gate  *		src		source address
1940Sstevel@tonic-gate  *		dst		destination address
1950Sstevel@tonic-gate  *		ports		local and remote port
1960Sstevel@tonic-gate  *		ifindex		interface index for IPv6 connections
1970Sstevel@tonic-gate  *
1980Sstevel@tonic-gate  *	Return value :
1990Sstevel@tonic-gate  *		0		if connp was inserted
2000Sstevel@tonic-gate  *		EADDRINUSE	if the connection with the same tuple
2010Sstevel@tonic-gate  *				already exists.
2020Sstevel@tonic-gate  *
2030Sstevel@tonic-gate  * int ipcl_bind_insert(connp, protocol, src, lport);
2040Sstevel@tonic-gate  * int ipcl_bind_insert_v6(connp, protocol, src, lport);
2050Sstevel@tonic-gate  *
2060Sstevel@tonic-gate  * 	Insert 'connp' in ipcl_bind_fanout.
2070Sstevel@tonic-gate  * 	Arguements :
2080Sstevel@tonic-gate  * 		connp		conn_t to be inserted
2090Sstevel@tonic-gate  * 		protocol	connection protocol
2100Sstevel@tonic-gate  * 		src		source address connection wants
2110Sstevel@tonic-gate  * 				to bind to
2120Sstevel@tonic-gate  * 		lport		local port connection wants to
2130Sstevel@tonic-gate  * 				bind to
2140Sstevel@tonic-gate  *
2150Sstevel@tonic-gate  *
2160Sstevel@tonic-gate  * void ipcl_hash_remove(connp);
2170Sstevel@tonic-gate  *
2180Sstevel@tonic-gate  * 	Removes the 'connp' from the connection fanout table.
2190Sstevel@tonic-gate  *
2200Sstevel@tonic-gate  * Connection Creation/Destruction
2210Sstevel@tonic-gate  * -------------------------------
2220Sstevel@tonic-gate  *
2233448Sdh155122  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
2240Sstevel@tonic-gate  *
2250Sstevel@tonic-gate  * 	Creates a new conn based on the type flag, inserts it into
2260Sstevel@tonic-gate  * 	globalhash table.
2270Sstevel@tonic-gate  *
2280Sstevel@tonic-gate  *	type:	This flag determines the type of conn_t which needs to be
2295240Snordmark  *		created i.e., which kmem_cache it comes from.
2300Sstevel@tonic-gate  *		IPCL_TCPCONN	indicates a TCP connection
2315240Snordmark  *		IPCL_SCTPCONN	indicates a SCTP connection
2325240Snordmark  *		IPCL_UDPCONN	indicates a UDP conn_t.
2335240Snordmark  *		IPCL_RAWIPCONN	indicates a RAWIP/ICMP conn_t.
2345240Snordmark  *		IPCL_RTSCONN	indicates a RTS conn_t.
2355240Snordmark  *		IPCL_IPCCONN	indicates all other connections.
2360Sstevel@tonic-gate  *
2370Sstevel@tonic-gate  * void ipcl_conn_destroy(connp)
2380Sstevel@tonic-gate  *
2390Sstevel@tonic-gate  * 	Destroys the connection state, removes it from the global
2400Sstevel@tonic-gate  * 	connection hash table and frees its memory.
2410Sstevel@tonic-gate  */
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate #include <sys/types.h>
2440Sstevel@tonic-gate #include <sys/stream.h>
2450Sstevel@tonic-gate #include <sys/stropts.h>
2460Sstevel@tonic-gate #include <sys/sysmacros.h>
2470Sstevel@tonic-gate #include <sys/strsubr.h>
2480Sstevel@tonic-gate #include <sys/strsun.h>
2490Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
2500Sstevel@tonic-gate #include <sys/ddi.h>
2510Sstevel@tonic-gate #include <sys/cmn_err.h>
2520Sstevel@tonic-gate #include <sys/debug.h>
2530Sstevel@tonic-gate 
2540Sstevel@tonic-gate #include <sys/systm.h>
2550Sstevel@tonic-gate #include <sys/param.h>
2560Sstevel@tonic-gate #include <sys/kmem.h>
2570Sstevel@tonic-gate #include <sys/isa_defs.h>
2580Sstevel@tonic-gate #include <inet/common.h>
2590Sstevel@tonic-gate #include <netinet/ip6.h>
2600Sstevel@tonic-gate #include <netinet/icmp6.h>
2610Sstevel@tonic-gate 
2620Sstevel@tonic-gate #include <inet/ip.h>
2630Sstevel@tonic-gate #include <inet/ip6.h>
2640Sstevel@tonic-gate #include <inet/ip_ndp.h>
2658348SEric.Yu@Sun.COM #include <inet/ip_impl.h>
266741Smasputra #include <inet/udp_impl.h>
2670Sstevel@tonic-gate #include <inet/sctp_ip.h>
2683448Sdh155122 #include <inet/sctp/sctp_impl.h>
2695240Snordmark #include <inet/rawip_impl.h>
2705240Snordmark #include <inet/rts_impl.h>
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate #include <sys/cpuvar.h>
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate #include <inet/ipclassifier.h>
2758348SEric.Yu@Sun.COM #include <inet/tcp.h>
2760Sstevel@tonic-gate #include <inet/ipsec_impl.h>
2770Sstevel@tonic-gate 
2781676Sjpk #include <sys/tsol/tnet.h>
2798348SEric.Yu@Sun.COM #include <sys/sockio.h>
2801676Sjpk 
2810Sstevel@tonic-gate #ifdef DEBUG
2820Sstevel@tonic-gate #define	IPCL_DEBUG
2830Sstevel@tonic-gate #else
2840Sstevel@tonic-gate #undef	IPCL_DEBUG
2850Sstevel@tonic-gate #endif
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate #ifdef	IPCL_DEBUG
2880Sstevel@tonic-gate int	ipcl_debug_level = 0;
2890Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args)	\
2900Sstevel@tonic-gate 	if (ipcl_debug_level  & level) { printf args; }
2910Sstevel@tonic-gate #else
2920Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args) {; }
2930Sstevel@tonic-gate #endif
2943448Sdh155122 /* Old value for compatibility. Setable in /etc/system */
2950Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0;
2960Sstevel@tonic-gate 
2973448Sdh155122 /* New value. Zero means choose automatically.  Setable in /etc/system */
2980Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0;
2990Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192;
3000Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500;
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate /* bind/udp fanout table size */
3030Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512;
3041503Sericheng uint_t ipcl_udp_fanout_size = 16384;
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate /* Raw socket fanout size.  Must be a power of 2. */
3070Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256;
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate /*
3100Sstevel@tonic-gate  * Power of 2^N Primes useful for hashing for N of 0-28,
3110Sstevel@tonic-gate  * these primes are the nearest prime <= 2^N - 2^(N-2).
3120Sstevel@tonic-gate  */
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
3150Sstevel@tonic-gate 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
3160Sstevel@tonic-gate 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
3170Sstevel@tonic-gate 		50331599, 100663291, 201326557, 0}
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate /*
3205240Snordmark  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
3215240Snordmark  * are aligned on cache lines.
3220Sstevel@tonic-gate  */
3235240Snordmark typedef union itc_s {
3245240Snordmark 	conn_t	itc_conn;
3255240Snordmark 	char	itcu_filler[CACHE_ALIGN(conn_s)];
3260Sstevel@tonic-gate } itc_t;
3270Sstevel@tonic-gate 
3285240Snordmark struct kmem_cache  *tcp_conn_cache;
3295240Snordmark struct kmem_cache  *ip_conn_cache;
3308348SEric.Yu@Sun.COM struct kmem_cache  *ip_helper_stream_cache;
3310Sstevel@tonic-gate extern struct kmem_cache  *sctp_conn_cache;
3320Sstevel@tonic-gate extern struct kmem_cache  *tcp_sack_info_cache;
3330Sstevel@tonic-gate extern struct kmem_cache  *tcp_iphc_cache;
3345240Snordmark struct kmem_cache  *udp_conn_cache;
3355240Snordmark struct kmem_cache  *rawip_conn_cache;
3365240Snordmark struct kmem_cache  *rts_conn_cache;
3370Sstevel@tonic-gate 
3380Sstevel@tonic-gate extern void	tcp_timermp_free(tcp_t *);
3390Sstevel@tonic-gate extern mblk_t	*tcp_timermp_alloc(int);
3400Sstevel@tonic-gate 
3415240Snordmark static int	ip_conn_constructor(void *, void *, int);
3425240Snordmark static void	ip_conn_destructor(void *, void *);
3435240Snordmark 
3445240Snordmark static int	tcp_conn_constructor(void *, void *, int);
3455240Snordmark static void	tcp_conn_destructor(void *, void *);
3465240Snordmark 
3475240Snordmark static int	udp_conn_constructor(void *, void *, int);
3485240Snordmark static void	udp_conn_destructor(void *, void *);
3495240Snordmark 
3505240Snordmark static int	rawip_conn_constructor(void *, void *, int);
3515240Snordmark static void	rawip_conn_destructor(void *, void *);
3525240Snordmark 
3535240Snordmark static int	rts_conn_constructor(void *, void *, int);
3545240Snordmark static void	rts_conn_destructor(void *, void *);
3550Sstevel@tonic-gate 
3568348SEric.Yu@Sun.COM static int	ip_helper_stream_constructor(void *, void *, int);
3578348SEric.Yu@Sun.COM static void	ip_helper_stream_destructor(void *, void *);
3588348SEric.Yu@Sun.COM 
3598348SEric.Yu@Sun.COM boolean_t	ip_use_helper_cache = B_TRUE;
3608348SEric.Yu@Sun.COM 
3618392SHuafeng.Lv@Sun.COM /*
3628392SHuafeng.Lv@Sun.COM  * Hook functions to enable cluster networking
3638392SHuafeng.Lv@Sun.COM  * On non-clustered systems these vectors must always be NULL.
3648392SHuafeng.Lv@Sun.COM  */
3658392SHuafeng.Lv@Sun.COM extern void	(*cl_inet_listen)(netstackid_t, uint8_t, sa_family_t,
3668392SHuafeng.Lv@Sun.COM 		    uint8_t *, in_port_t, void *);
3678392SHuafeng.Lv@Sun.COM extern void	(*cl_inet_unlisten)(netstackid_t, uint8_t, sa_family_t,
3688392SHuafeng.Lv@Sun.COM 		    uint8_t *, in_port_t, void *);
3698392SHuafeng.Lv@Sun.COM 
3700Sstevel@tonic-gate #ifdef	IPCL_DEBUG
3710Sstevel@tonic-gate #define	INET_NTOA_BUFSIZE	18
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate static char *
3740Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b)
3750Sstevel@tonic-gate {
3760Sstevel@tonic-gate 	unsigned char	*p;
3770Sstevel@tonic-gate 
3780Sstevel@tonic-gate 	p = (unsigned char *)&in;
3790Sstevel@tonic-gate 	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
3800Sstevel@tonic-gate 	return (b);
3810Sstevel@tonic-gate }
3820Sstevel@tonic-gate #endif
3830Sstevel@tonic-gate 
3840Sstevel@tonic-gate /*
3853448Sdh155122  * Global (for all stack instances) init routine
3860Sstevel@tonic-gate  */
3870Sstevel@tonic-gate void
3883448Sdh155122 ipcl_g_init(void)
3890Sstevel@tonic-gate {
3905240Snordmark 	ip_conn_cache = kmem_cache_create("ip_conn_cache",
3910Sstevel@tonic-gate 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
3925240Snordmark 	    ip_conn_constructor, ip_conn_destructor,
3935240Snordmark 	    NULL, NULL, NULL, 0);
3945240Snordmark 
3955240Snordmark 	tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
3965240Snordmark 	    sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
3975240Snordmark 	    tcp_conn_constructor, tcp_conn_destructor,
3985240Snordmark 	    NULL, NULL, NULL, 0);
3990Sstevel@tonic-gate 
4005240Snordmark 	udp_conn_cache = kmem_cache_create("udp_conn_cache",
4015240Snordmark 	    sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
4025240Snordmark 	    udp_conn_constructor, udp_conn_destructor,
4035240Snordmark 	    NULL, NULL, NULL, 0);
4045240Snordmark 
4055240Snordmark 	rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
4065240Snordmark 	    sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
4075240Snordmark 	    rawip_conn_constructor, rawip_conn_destructor,
4085240Snordmark 	    NULL, NULL, NULL, 0);
4095240Snordmark 
4105240Snordmark 	rts_conn_cache = kmem_cache_create("rts_conn_cache",
4115240Snordmark 	    sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
4125240Snordmark 	    rts_conn_constructor, rts_conn_destructor,
4130Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
4148348SEric.Yu@Sun.COM 
4158348SEric.Yu@Sun.COM 	if (ip_use_helper_cache) {
4168348SEric.Yu@Sun.COM 		ip_helper_stream_cache = kmem_cache_create
4178348SEric.Yu@Sun.COM 		    ("ip_helper_stream_cache", sizeof (ip_helper_stream_info_t),
4188348SEric.Yu@Sun.COM 		    CACHE_ALIGN_SIZE, ip_helper_stream_constructor,
4198348SEric.Yu@Sun.COM 		    ip_helper_stream_destructor, NULL, NULL, NULL, 0);
4208348SEric.Yu@Sun.COM 	} else {
4218348SEric.Yu@Sun.COM 		ip_helper_stream_cache = NULL;
4228348SEric.Yu@Sun.COM 	}
4233448Sdh155122 }
4243448Sdh155122 
4253448Sdh155122 /*
4263448Sdh155122  * ipclassifier intialization routine, sets up hash tables.
4273448Sdh155122  */
4283448Sdh155122 void
4293448Sdh155122 ipcl_init(ip_stack_t *ipst)
4303448Sdh155122 {
4313448Sdh155122 	int i;
4323448Sdh155122 	int sizes[] = P2Ps();
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 	/*
4353448Sdh155122 	 * Calculate size of conn fanout table from /etc/system settings
4360Sstevel@tonic-gate 	 */
4370Sstevel@tonic-gate 	if (ipcl_conn_hash_size != 0) {
4383448Sdh155122 		ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
4390Sstevel@tonic-gate 	} else if (tcp_conn_hash_size != 0) {
4403448Sdh155122 		ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
4410Sstevel@tonic-gate 	} else {
4420Sstevel@tonic-gate 		extern pgcnt_t freemem;
4430Sstevel@tonic-gate 
4443448Sdh155122 		ipst->ips_ipcl_conn_fanout_size =
4450Sstevel@tonic-gate 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
4460Sstevel@tonic-gate 
4473448Sdh155122 		if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
4483448Sdh155122 			ipst->ips_ipcl_conn_fanout_size =
4493448Sdh155122 			    ipcl_conn_hash_maxsize;
4503448Sdh155122 		}
4510Sstevel@tonic-gate 	}
4520Sstevel@tonic-gate 
4530Sstevel@tonic-gate 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
4543448Sdh155122 		if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
4550Sstevel@tonic-gate 			break;
4560Sstevel@tonic-gate 		}
4570Sstevel@tonic-gate 	}
4583448Sdh155122 	if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
4590Sstevel@tonic-gate 		/* Out of range, use the 2^16 value */
4603448Sdh155122 		ipst->ips_ipcl_conn_fanout_size = sizes[16];
4610Sstevel@tonic-gate 	}
4623448Sdh155122 
4633448Sdh155122 	/* Take values from /etc/system */
4643448Sdh155122 	ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
4653448Sdh155122 	ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
4663448Sdh155122 	ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
4670Sstevel@tonic-gate 
4683448Sdh155122 	ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
4693448Sdh155122 
4703448Sdh155122 	ipst->ips_ipcl_conn_fanout = kmem_zalloc(
4713448Sdh155122 	    ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
4723448Sdh155122 
4733448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
4743448Sdh155122 		mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
4750Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4760Sstevel@tonic-gate 	}
4770Sstevel@tonic-gate 
4783448Sdh155122 	ipst->ips_ipcl_bind_fanout = kmem_zalloc(
4793448Sdh155122 	    ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
4800Sstevel@tonic-gate 
4813448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
4823448Sdh155122 		mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
4830Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4840Sstevel@tonic-gate 	}
4850Sstevel@tonic-gate 
4863448Sdh155122 	ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX *
4873448Sdh155122 	    sizeof (connf_t), KM_SLEEP);
4883448Sdh155122 	for (i = 0; i < IPPROTO_MAX; i++) {
4893448Sdh155122 		mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL,
4900Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4910Sstevel@tonic-gate 	}
4923448Sdh155122 
4933448Sdh155122 	ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
4943448Sdh155122 	    sizeof (connf_t), KM_SLEEP);
4953448Sdh155122 	for (i = 0; i < IPPROTO_MAX; i++) {
4963448Sdh155122 		mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
4970Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4980Sstevel@tonic-gate 	}
4990Sstevel@tonic-gate 
5003448Sdh155122 	ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
5013448Sdh155122 	mutex_init(&ipst->ips_rts_clients->connf_lock,
5023448Sdh155122 	    NULL, MUTEX_DEFAULT, NULL);
5030Sstevel@tonic-gate 
5043448Sdh155122 	ipst->ips_ipcl_udp_fanout = kmem_zalloc(
5053448Sdh155122 	    ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
5063448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
5073448Sdh155122 		mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
5080Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
5090Sstevel@tonic-gate 	}
5100Sstevel@tonic-gate 
5113448Sdh155122 	ipst->ips_ipcl_raw_fanout = kmem_zalloc(
5123448Sdh155122 	    ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
5133448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
5143448Sdh155122 		mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
5150Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
5160Sstevel@tonic-gate 	}
5170Sstevel@tonic-gate 
5183448Sdh155122 	ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
5193448Sdh155122 	    sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
5200Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
5213448Sdh155122 		mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
5223448Sdh155122 		    NULL, MUTEX_DEFAULT, NULL);
5230Sstevel@tonic-gate 	}
5240Sstevel@tonic-gate }
5250Sstevel@tonic-gate 
5260Sstevel@tonic-gate void
5273448Sdh155122 ipcl_g_destroy(void)
5280Sstevel@tonic-gate {
5295240Snordmark 	kmem_cache_destroy(ip_conn_cache);
5305240Snordmark 	kmem_cache_destroy(tcp_conn_cache);
5315240Snordmark 	kmem_cache_destroy(udp_conn_cache);
5325240Snordmark 	kmem_cache_destroy(rawip_conn_cache);
5335240Snordmark 	kmem_cache_destroy(rts_conn_cache);
5343448Sdh155122 }
5353448Sdh155122 
5363448Sdh155122 /*
5373448Sdh155122  * All user-level and kernel use of the stack must be gone
5383448Sdh155122  * by now.
5393448Sdh155122  */
5403448Sdh155122 void
5413448Sdh155122 ipcl_destroy(ip_stack_t *ipst)
5423448Sdh155122 {
5433448Sdh155122 	int i;
5443448Sdh155122 
5453448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
5463448Sdh155122 		ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
5473448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
5483448Sdh155122 	}
5493448Sdh155122 	kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
5503448Sdh155122 	    sizeof (connf_t));
5513448Sdh155122 	ipst->ips_ipcl_conn_fanout = NULL;
5523448Sdh155122 
5533448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
5543448Sdh155122 		ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
5553448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
5563448Sdh155122 	}
5573448Sdh155122 	kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
5583448Sdh155122 	    sizeof (connf_t));
5593448Sdh155122 	ipst->ips_ipcl_bind_fanout = NULL;
5603448Sdh155122 
5613448Sdh155122 	for (i = 0; i < IPPROTO_MAX; i++) {
5623448Sdh155122 		ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL);
5633448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock);
5643448Sdh155122 	}
5653448Sdh155122 	kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t));
5663448Sdh155122 	ipst->ips_ipcl_proto_fanout = NULL;
5670Sstevel@tonic-gate 
5683448Sdh155122 	for (i = 0; i < IPPROTO_MAX; i++) {
5693448Sdh155122 		ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
5703448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
5713448Sdh155122 	}
5723448Sdh155122 	kmem_free(ipst->ips_ipcl_proto_fanout_v6,
5733448Sdh155122 	    IPPROTO_MAX * sizeof (connf_t));
5743448Sdh155122 	ipst->ips_ipcl_proto_fanout_v6 = NULL;
5753448Sdh155122 
5763448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
5773448Sdh155122 		ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
5783448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
5793448Sdh155122 	}
5803448Sdh155122 	kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
5813448Sdh155122 	    sizeof (connf_t));
5823448Sdh155122 	ipst->ips_ipcl_udp_fanout = NULL;
5830Sstevel@tonic-gate 
5843448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
5853448Sdh155122 		ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
5863448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
5873448Sdh155122 	}
5883448Sdh155122 	kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
5893448Sdh155122 	    sizeof (connf_t));
5903448Sdh155122 	ipst->ips_ipcl_raw_fanout = NULL;
5910Sstevel@tonic-gate 
5923448Sdh155122 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
5933448Sdh155122 		ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
5943448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
5953448Sdh155122 	}
5963448Sdh155122 	kmem_free(ipst->ips_ipcl_globalhash_fanout,
5973448Sdh155122 	    sizeof (connf_t) * CONN_G_HASH_SIZE);
5983448Sdh155122 	ipst->ips_ipcl_globalhash_fanout = NULL;
5990Sstevel@tonic-gate 
6003448Sdh155122 	ASSERT(ipst->ips_rts_clients->connf_head == NULL);
6013448Sdh155122 	mutex_destroy(&ipst->ips_rts_clients->connf_lock);
6023448Sdh155122 	kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
6033448Sdh155122 	ipst->ips_rts_clients = NULL;
6040Sstevel@tonic-gate }
6050Sstevel@tonic-gate 
6060Sstevel@tonic-gate /*
6070Sstevel@tonic-gate  * conn creation routine. initialize the conn, sets the reference
6080Sstevel@tonic-gate  * and inserts it in the global hash table.
6090Sstevel@tonic-gate  */
6100Sstevel@tonic-gate conn_t *
6113448Sdh155122 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
6120Sstevel@tonic-gate {
6130Sstevel@tonic-gate 	conn_t	*connp;
6143448Sdh155122 	sctp_stack_t *sctps;
6155240Snordmark 	struct kmem_cache *conn_cache;
6160Sstevel@tonic-gate 
6170Sstevel@tonic-gate 	switch (type) {
6180Sstevel@tonic-gate 	case IPCL_SCTPCONN:
6190Sstevel@tonic-gate 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
6200Sstevel@tonic-gate 			return (NULL);
6214691Skcpoon 		sctp_conn_init(connp);
6223448Sdh155122 		sctps = ns->netstack_sctp;
6233448Sdh155122 		SCTP_G_Q_REFHOLD(sctps);
6243448Sdh155122 		netstack_hold(ns);
6253448Sdh155122 		connp->conn_netstack = ns;
6265240Snordmark 		return (connp);
6275240Snordmark 
6285240Snordmark 	case IPCL_TCPCONN:
6295240Snordmark 		conn_cache = tcp_conn_cache;
6300Sstevel@tonic-gate 		break;
6315240Snordmark 
6325240Snordmark 	case IPCL_UDPCONN:
6335240Snordmark 		conn_cache = udp_conn_cache;
6345240Snordmark 		break;
6355240Snordmark 
6365240Snordmark 	case IPCL_RAWIPCONN:
6375240Snordmark 		conn_cache = rawip_conn_cache;
6385240Snordmark 		break;
6395240Snordmark 
6405240Snordmark 	case IPCL_RTSCONN:
6415240Snordmark 		conn_cache = rts_conn_cache;
6425240Snordmark 		break;
6435240Snordmark 
6440Sstevel@tonic-gate 	case IPCL_IPCCONN:
6455240Snordmark 		conn_cache = ip_conn_cache;
6460Sstevel@tonic-gate 		break;
6475240Snordmark 
648741Smasputra 	default:
649741Smasputra 		connp = NULL;
650741Smasputra 		ASSERT(0);
6510Sstevel@tonic-gate 	}
6520Sstevel@tonic-gate 
6535240Snordmark 	if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
6545240Snordmark 		return (NULL);
6555240Snordmark 
6565240Snordmark 	connp->conn_ref = 1;
6575240Snordmark 	netstack_hold(ns);
6585240Snordmark 	connp->conn_netstack = ns;
6595240Snordmark 	ipcl_globalhash_insert(connp);
6600Sstevel@tonic-gate 	return (connp);
6610Sstevel@tonic-gate }
6620Sstevel@tonic-gate 
6630Sstevel@tonic-gate void
6640Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp)
6650Sstevel@tonic-gate {
6660Sstevel@tonic-gate 	mblk_t	*mp;
6673448Sdh155122 	netstack_t	*ns = connp->conn_netstack;
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
6700Sstevel@tonic-gate 	ASSERT(connp->conn_ref == 0);
6710Sstevel@tonic-gate 	ASSERT(connp->conn_ire_cache == NULL);
6720Sstevel@tonic-gate 
6737502Saruna@cs.umn.edu 	DTRACE_PROBE1(conn__destroy, conn_t *, connp);
6747502Saruna@cs.umn.edu 
6751676Sjpk 	if (connp->conn_peercred != NULL &&
6761676Sjpk 	    connp->conn_peercred != connp->conn_cred)
6771676Sjpk 		crfree(connp->conn_peercred);
6781676Sjpk 	connp->conn_peercred = NULL;
6791676Sjpk 
6801676Sjpk 	if (connp->conn_cred != NULL) {
6811676Sjpk 		crfree(connp->conn_cred);
6821676Sjpk 		connp->conn_cred = NULL;
6831676Sjpk 	}
6841676Sjpk 
6850Sstevel@tonic-gate 	ipcl_globalhash_remove(connp);
6860Sstevel@tonic-gate 
6875240Snordmark 	/* FIXME: add separate tcp_conn_free()? */
6880Sstevel@tonic-gate 	if (connp->conn_flags & IPCL_TCPCONN) {
689741Smasputra 		tcp_t	*tcp = connp->conn_tcp;
6903448Sdh155122 		tcp_stack_t *tcps;
6913448Sdh155122 
6923448Sdh155122 		ASSERT(tcp != NULL);
6933448Sdh155122 		tcps = tcp->tcp_tcps;
6943448Sdh155122 		if (tcps != NULL) {
6953448Sdh155122 			if (connp->conn_latch != NULL) {
6963448Sdh155122 				IPLATCH_REFRELE(connp->conn_latch, ns);
6973448Sdh155122 				connp->conn_latch = NULL;
6983448Sdh155122 			}
6993448Sdh155122 			if (connp->conn_policy != NULL) {
7003448Sdh155122 				IPPH_REFRELE(connp->conn_policy, ns);
7013448Sdh155122 				connp->conn_policy = NULL;
7023448Sdh155122 			}
7033448Sdh155122 			tcp->tcp_tcps = NULL;
7043448Sdh155122 			TCPS_REFRELE(tcps);
7053448Sdh155122 		}
706741Smasputra 
7070Sstevel@tonic-gate 		tcp_free(tcp);
7080Sstevel@tonic-gate 		mp = tcp->tcp_timercache;
7091676Sjpk 		tcp->tcp_cred = NULL;
7100Sstevel@tonic-gate 
7110Sstevel@tonic-gate 		if (tcp->tcp_sack_info != NULL) {
7120Sstevel@tonic-gate 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
7130Sstevel@tonic-gate 			kmem_cache_free(tcp_sack_info_cache,
7140Sstevel@tonic-gate 			    tcp->tcp_sack_info);
7150Sstevel@tonic-gate 		}
7160Sstevel@tonic-gate 		if (tcp->tcp_iphc != NULL) {
7170Sstevel@tonic-gate 			if (tcp->tcp_hdr_grown) {
7180Sstevel@tonic-gate 				kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len);
7190Sstevel@tonic-gate 			} else {
7200Sstevel@tonic-gate 				bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
7210Sstevel@tonic-gate 				kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc);
7220Sstevel@tonic-gate 			}
7230Sstevel@tonic-gate 			tcp->tcp_iphc_len = 0;
7240Sstevel@tonic-gate 		}
7250Sstevel@tonic-gate 		ASSERT(tcp->tcp_iphc_len == 0);
7260Sstevel@tonic-gate 
7278014SKacheong.Poon@Sun.COM 		/*
7288014SKacheong.Poon@Sun.COM 		 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
7298014SKacheong.Poon@Sun.COM 		 * the mblk.
7308014SKacheong.Poon@Sun.COM 		 */
7318014SKacheong.Poon@Sun.COM 		if (tcp->tcp_rsrv_mp != NULL) {
7328014SKacheong.Poon@Sun.COM 			freeb(tcp->tcp_rsrv_mp);
7338014SKacheong.Poon@Sun.COM 			tcp->tcp_rsrv_mp = NULL;
7348014SKacheong.Poon@Sun.COM 			mutex_destroy(&tcp->tcp_rsrv_mp_lock);
7358014SKacheong.Poon@Sun.COM 		}
7368014SKacheong.Poon@Sun.COM 
7373448Sdh155122 		ASSERT(connp->conn_latch == NULL);
7383448Sdh155122 		ASSERT(connp->conn_policy == NULL);
7393448Sdh155122 
7403448Sdh155122 		if (ns != NULL) {
7413448Sdh155122 			ASSERT(tcp->tcp_tcps == NULL);
7423448Sdh155122 			connp->conn_netstack = NULL;
7433448Sdh155122 			netstack_rele(ns);
7443448Sdh155122 		}
7455240Snordmark 
7465240Snordmark 		ipcl_conn_cleanup(connp);
7475240Snordmark 		connp->conn_flags = IPCL_TCPCONN;
7485240Snordmark 		bzero(tcp, sizeof (tcp_t));
7495240Snordmark 
7505240Snordmark 		tcp->tcp_timercache = mp;
7515240Snordmark 		tcp->tcp_connp = connp;
7525240Snordmark 		kmem_cache_free(tcp_conn_cache, connp);
7535240Snordmark 		return;
7545240Snordmark 	}
7555240Snordmark 	if (connp->conn_latch != NULL) {
7565240Snordmark 		IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack);
7575240Snordmark 		connp->conn_latch = NULL;
7585240Snordmark 	}
7595240Snordmark 	if (connp->conn_policy != NULL) {
7605240Snordmark 		IPPH_REFRELE(connp->conn_policy, connp->conn_netstack);
7615240Snordmark 		connp->conn_policy = NULL;
7625240Snordmark 	}
7635240Snordmark 	if (connp->conn_ipsec_opt_mp != NULL) {
7645240Snordmark 		freemsg(connp->conn_ipsec_opt_mp);
7655240Snordmark 		connp->conn_ipsec_opt_mp = NULL;
7665240Snordmark 	}
7675240Snordmark 
7685240Snordmark 	if (connp->conn_flags & IPCL_SCTPCONN) {
7693448Sdh155122 		ASSERT(ns != NULL);
7700Sstevel@tonic-gate 		sctp_free(connp);
7715240Snordmark 		return;
7725240Snordmark 	}
7735240Snordmark 
7745240Snordmark 	if (ns != NULL) {
7755240Snordmark 		connp->conn_netstack = NULL;
7765240Snordmark 		netstack_rele(ns);
7775240Snordmark 	}
7788348SEric.Yu@Sun.COM 
7795240Snordmark 	ipcl_conn_cleanup(connp);
7805240Snordmark 
7815240Snordmark 	/* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
7825240Snordmark 	if (connp->conn_flags & IPCL_UDPCONN) {
7835240Snordmark 		connp->conn_flags = IPCL_UDPCONN;
7845240Snordmark 		kmem_cache_free(udp_conn_cache, connp);
7855240Snordmark 	} else if (connp->conn_flags & IPCL_RAWIPCONN) {
7868348SEric.Yu@Sun.COM 
7875240Snordmark 		connp->conn_flags = IPCL_RAWIPCONN;
7885240Snordmark 		connp->conn_ulp = IPPROTO_ICMP;
7895240Snordmark 		kmem_cache_free(rawip_conn_cache, connp);
7905240Snordmark 	} else if (connp->conn_flags & IPCL_RTSCONN) {
7915240Snordmark 		connp->conn_flags = IPCL_RTSCONN;
7925240Snordmark 		kmem_cache_free(rts_conn_cache, connp);
7930Sstevel@tonic-gate 	} else {
7945240Snordmark 		connp->conn_flags = IPCL_IPCCONN;
7955240Snordmark 		ASSERT(connp->conn_flags & IPCL_IPCCONN);
7965240Snordmark 		ASSERT(connp->conn_priv == NULL);
7975240Snordmark 		kmem_cache_free(ip_conn_cache, connp);
7980Sstevel@tonic-gate 	}
7990Sstevel@tonic-gate }
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate /*
8020Sstevel@tonic-gate  * Running in cluster mode - deregister listener information
8030Sstevel@tonic-gate  */
8040Sstevel@tonic-gate 
8050Sstevel@tonic-gate static void
8060Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp)
8070Sstevel@tonic-gate {
8080Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
8090Sstevel@tonic-gate 	ASSERT(connp->conn_lport != 0);
8100Sstevel@tonic-gate 
8110Sstevel@tonic-gate 	if (cl_inet_unlisten != NULL) {
8120Sstevel@tonic-gate 		sa_family_t	addr_family;
8130Sstevel@tonic-gate 		uint8_t		*laddrp;
8140Sstevel@tonic-gate 
8150Sstevel@tonic-gate 		if (connp->conn_pkt_isv6) {
8160Sstevel@tonic-gate 			addr_family = AF_INET6;
8170Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source_v6;
8180Sstevel@tonic-gate 		} else {
8190Sstevel@tonic-gate 			addr_family = AF_INET;
8200Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source;
8210Sstevel@tonic-gate 		}
8228392SHuafeng.Lv@Sun.COM 		(*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
8238392SHuafeng.Lv@Sun.COM 		    IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
8240Sstevel@tonic-gate 	}
8250Sstevel@tonic-gate 	connp->conn_flags &= ~IPCL_CL_LISTENER;
8260Sstevel@tonic-gate }
8270Sstevel@tonic-gate 
8280Sstevel@tonic-gate /*
8290Sstevel@tonic-gate  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
8300Sstevel@tonic-gate  * which table the conn belonged to). So for debugging we can see which hash
8310Sstevel@tonic-gate  * table this connection was in.
8320Sstevel@tonic-gate  */
8330Sstevel@tonic-gate #define	IPCL_HASH_REMOVE(connp)	{					\
8340Sstevel@tonic-gate 	connf_t	*connfp = (connp)->conn_fanout;				\
8350Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
8360Sstevel@tonic-gate 	if (connfp != NULL) {						\
8370Sstevel@tonic-gate 		IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p",	\
8380Sstevel@tonic-gate 		    (void *)(connp)));					\
8390Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);			\
8400Sstevel@tonic-gate 		if ((connp)->conn_next != NULL)				\
8410Sstevel@tonic-gate 			(connp)->conn_next->conn_prev =			\
8420Sstevel@tonic-gate 			    (connp)->conn_prev;				\
8430Sstevel@tonic-gate 		if ((connp)->conn_prev != NULL)				\
8440Sstevel@tonic-gate 			(connp)->conn_prev->conn_next =			\
8450Sstevel@tonic-gate 			    (connp)->conn_next;				\
8460Sstevel@tonic-gate 		else							\
8470Sstevel@tonic-gate 			connfp->connf_head = (connp)->conn_next;	\
8480Sstevel@tonic-gate 		(connp)->conn_fanout = NULL;				\
8490Sstevel@tonic-gate 		(connp)->conn_next = NULL;				\
8500Sstevel@tonic-gate 		(connp)->conn_prev = NULL;				\
8510Sstevel@tonic-gate 		(connp)->conn_flags |= IPCL_REMOVED;			\
8520Sstevel@tonic-gate 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
8530Sstevel@tonic-gate 			ipcl_conn_unlisten((connp));			\
8540Sstevel@tonic-gate 		CONN_DEC_REF((connp));					\
8550Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);			\
8560Sstevel@tonic-gate 	}								\
8570Sstevel@tonic-gate }
8580Sstevel@tonic-gate 
8590Sstevel@tonic-gate void
8600Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp)
8610Sstevel@tonic-gate {
8620Sstevel@tonic-gate 	IPCL_HASH_REMOVE(connp);
8630Sstevel@tonic-gate }
8640Sstevel@tonic-gate 
8650Sstevel@tonic-gate /*
8660Sstevel@tonic-gate  * The whole purpose of this function is allow removal of
8670Sstevel@tonic-gate  * a conn_t from the connected hash for timewait reclaim.
8680Sstevel@tonic-gate  * This is essentially a TW reclaim fastpath where timewait
8690Sstevel@tonic-gate  * collector checks under fanout lock (so no one else can
8700Sstevel@tonic-gate  * get access to the conn_t) that refcnt is 2 i.e. one for
8710Sstevel@tonic-gate  * TCP and one for the classifier hash list. If ref count
8720Sstevel@tonic-gate  * is indeed 2, we can just remove the conn under lock and
8730Sstevel@tonic-gate  * avoid cleaning up the conn under squeue. This gives us
8740Sstevel@tonic-gate  * improved performance.
8750Sstevel@tonic-gate  */
8760Sstevel@tonic-gate void
8770Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
8780Sstevel@tonic-gate {
8790Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
8800Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
8810Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
8820Sstevel@tonic-gate 
8830Sstevel@tonic-gate 	if ((connp)->conn_next != NULL) {
8844691Skcpoon 		(connp)->conn_next->conn_prev = (connp)->conn_prev;
8850Sstevel@tonic-gate 	}
8860Sstevel@tonic-gate 	if ((connp)->conn_prev != NULL) {
8874691Skcpoon 		(connp)->conn_prev->conn_next = (connp)->conn_next;
8880Sstevel@tonic-gate 	} else {
8890Sstevel@tonic-gate 		connfp->connf_head = (connp)->conn_next;
8900Sstevel@tonic-gate 	}
8910Sstevel@tonic-gate 	(connp)->conn_fanout = NULL;
8920Sstevel@tonic-gate 	(connp)->conn_next = NULL;
8930Sstevel@tonic-gate 	(connp)->conn_prev = NULL;
8940Sstevel@tonic-gate 	(connp)->conn_flags |= IPCL_REMOVED;
8950Sstevel@tonic-gate 	ASSERT((connp)->conn_ref == 2);
8960Sstevel@tonic-gate 	(connp)->conn_ref--;
8970Sstevel@tonic-gate }
8980Sstevel@tonic-gate 
8990Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
9000Sstevel@tonic-gate 	ASSERT((connp)->conn_fanout == NULL);				\
9010Sstevel@tonic-gate 	ASSERT((connp)->conn_next == NULL);				\
9020Sstevel@tonic-gate 	ASSERT((connp)->conn_prev == NULL);				\
9030Sstevel@tonic-gate 	if ((connfp)->connf_head != NULL) {				\
9040Sstevel@tonic-gate 		(connfp)->connf_head->conn_prev = (connp);		\
9050Sstevel@tonic-gate 		(connp)->conn_next = (connfp)->connf_head;		\
9060Sstevel@tonic-gate 	}								\
9070Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
9080Sstevel@tonic-gate 	(connfp)->connf_head = (connp);					\
9090Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
9100Sstevel@tonic-gate 	    IPCL_CONNECTED;						\
9110Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
9120Sstevel@tonic-gate }
9130Sstevel@tonic-gate 
9140Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
9150Sstevel@tonic-gate 	IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p "	\
9160Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
9170Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
9180Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
9190Sstevel@tonic-gate 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
9200Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
9210Sstevel@tonic-gate }
9220Sstevel@tonic-gate 
9230Sstevel@tonic-gate #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
9240Sstevel@tonic-gate 	conn_t *pconnp = NULL, *nconnp;					\
9250Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p "	\
9260Sstevel@tonic-gate 	    "connp %p", (void *)connfp, (void *)(connp)));		\
9270Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
9280Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
9290Sstevel@tonic-gate 	nconnp = (connfp)->connf_head;					\
930153Sethindra 	while (nconnp != NULL &&					\
931153Sethindra 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) {			\
932153Sethindra 		pconnp = nconnp;					\
933153Sethindra 		nconnp = nconnp->conn_next;				\
9340Sstevel@tonic-gate 	}								\
9350Sstevel@tonic-gate 	if (pconnp != NULL) {						\
9360Sstevel@tonic-gate 		pconnp->conn_next = (connp);				\
9370Sstevel@tonic-gate 		(connp)->conn_prev = pconnp;				\
9380Sstevel@tonic-gate 	} else {							\
9390Sstevel@tonic-gate 		(connfp)->connf_head = (connp);				\
9400Sstevel@tonic-gate 	}								\
9410Sstevel@tonic-gate 	if (nconnp != NULL) {						\
9420Sstevel@tonic-gate 		(connp)->conn_next = nconnp;				\
9430Sstevel@tonic-gate 		nconnp->conn_prev = (connp);				\
9440Sstevel@tonic-gate 	}								\
9450Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
9460Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
9470Sstevel@tonic-gate 	    IPCL_BOUND;							\
9480Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
9490Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
9500Sstevel@tonic-gate }
9510Sstevel@tonic-gate 
9520Sstevel@tonic-gate #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
9530Sstevel@tonic-gate 	conn_t **list, *prev, *next;					\
9540Sstevel@tonic-gate 	boolean_t isv4mapped =						\
9550Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6);			\
9560Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p "	\
9570Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
9580Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
9590Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
9600Sstevel@tonic-gate 	list = &(connfp)->connf_head;					\
9610Sstevel@tonic-gate 	prev = NULL;							\
9620Sstevel@tonic-gate 	while ((next = *list) != NULL) {				\
9630Sstevel@tonic-gate 		if (isv4mapped &&					\
9640Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) &&	\
9650Sstevel@tonic-gate 		    connp->conn_zoneid == next->conn_zoneid) {		\
9660Sstevel@tonic-gate 			(connp)->conn_next = next;			\
9670Sstevel@tonic-gate 			if (prev != NULL)				\
9680Sstevel@tonic-gate 				prev = next->conn_prev;			\
9690Sstevel@tonic-gate 			next->conn_prev = (connp);			\
9700Sstevel@tonic-gate 			break;						\
9710Sstevel@tonic-gate 		}							\
9720Sstevel@tonic-gate 		list = &next->conn_next;				\
9730Sstevel@tonic-gate 		prev = next;						\
9740Sstevel@tonic-gate 	}								\
9750Sstevel@tonic-gate 	(connp)->conn_prev = prev;					\
9760Sstevel@tonic-gate 	*list = (connp);						\
9770Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
9780Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
9790Sstevel@tonic-gate 	    IPCL_BOUND;							\
9800Sstevel@tonic-gate 	CONN_INC_REF((connp));						\
9810Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
9820Sstevel@tonic-gate }
9830Sstevel@tonic-gate 
9840Sstevel@tonic-gate void
9850Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
9860Sstevel@tonic-gate {
9870Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9880Sstevel@tonic-gate }
9890Sstevel@tonic-gate 
9900Sstevel@tonic-gate void
9910Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol)
9920Sstevel@tonic-gate {
9930Sstevel@tonic-gate 	connf_t	*connfp;
9943448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
9950Sstevel@tonic-gate 
9960Sstevel@tonic-gate 	ASSERT(connp != NULL);
9971676Sjpk 	ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH ||
9981676Sjpk 	    protocol == IPPROTO_ESP);
9990Sstevel@tonic-gate 
10000Sstevel@tonic-gate 	connp->conn_ulp = protocol;
10010Sstevel@tonic-gate 
10020Sstevel@tonic-gate 	/* Insert it in the protocol hash */
10033448Sdh155122 	connfp = &ipst->ips_ipcl_proto_fanout[protocol];
10040Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10050Sstevel@tonic-gate }
10060Sstevel@tonic-gate 
10070Sstevel@tonic-gate void
10080Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol)
10090Sstevel@tonic-gate {
10100Sstevel@tonic-gate 	connf_t	*connfp;
10113448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
10120Sstevel@tonic-gate 
10130Sstevel@tonic-gate 	ASSERT(connp != NULL);
10141676Sjpk 	ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH ||
10151676Sjpk 	    protocol == IPPROTO_ESP);
10160Sstevel@tonic-gate 
10170Sstevel@tonic-gate 	connp->conn_ulp = protocol;
10180Sstevel@tonic-gate 
10190Sstevel@tonic-gate 	/* Insert it in the Bind Hash */
10203448Sdh155122 	connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
10210Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10220Sstevel@tonic-gate }
10230Sstevel@tonic-gate 
10240Sstevel@tonic-gate /*
10250Sstevel@tonic-gate  * This function is used only for inserting SCTP raw socket now.
10260Sstevel@tonic-gate  * This may change later.
10270Sstevel@tonic-gate  *
10280Sstevel@tonic-gate  * Note that only one raw socket can be bound to a port.  The param
10290Sstevel@tonic-gate  * lport is in network byte order.
10300Sstevel@tonic-gate  */
10310Sstevel@tonic-gate static int
10320Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
10330Sstevel@tonic-gate {
10340Sstevel@tonic-gate 	connf_t	*connfp;
10350Sstevel@tonic-gate 	conn_t	*oconnp;
10363448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
10370Sstevel@tonic-gate 
10383448Sdh155122 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
10390Sstevel@tonic-gate 
10400Sstevel@tonic-gate 	/* Check for existing raw socket already bound to the port. */
10410Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
10420Sstevel@tonic-gate 	for (oconnp = connfp->connf_head; oconnp != NULL;
1043409Skcpoon 	    oconnp = oconnp->conn_next) {
10440Sstevel@tonic-gate 		if (oconnp->conn_lport == lport &&
10450Sstevel@tonic-gate 		    oconnp->conn_zoneid == connp->conn_zoneid &&
10460Sstevel@tonic-gate 		    oconnp->conn_af_isv6 == connp->conn_af_isv6 &&
10470Sstevel@tonic-gate 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
10480Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) ||
10490Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) ||
10500Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) ||
10510Sstevel@tonic-gate 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6,
10520Sstevel@tonic-gate 		    &connp->conn_srcv6))) {
10530Sstevel@tonic-gate 			break;
10540Sstevel@tonic-gate 		}
10550Sstevel@tonic-gate 	}
10560Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
10570Sstevel@tonic-gate 	if (oconnp != NULL)
10580Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
10590Sstevel@tonic-gate 
10600Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
10610Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) {
10620Sstevel@tonic-gate 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
10630Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) {
10640Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10650Sstevel@tonic-gate 		} else {
10660Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10670Sstevel@tonic-gate 		}
10680Sstevel@tonic-gate 	} else {
10690Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
10700Sstevel@tonic-gate 	}
10710Sstevel@tonic-gate 	return (0);
10720Sstevel@tonic-gate }
10730Sstevel@tonic-gate 
10740Sstevel@tonic-gate /*
10751676Sjpk  * Check for a MAC exemption conflict on a labeled system.  Note that for
10761676Sjpk  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
10771676Sjpk  * transport layer.  This check is for binding all other protocols.
10781676Sjpk  *
10791676Sjpk  * Returns true if there's a conflict.
10801676Sjpk  */
10811676Sjpk static boolean_t
10823448Sdh155122 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
10831676Sjpk {
10841676Sjpk 	connf_t	*connfp;
10851676Sjpk 	conn_t *tconn;
10861676Sjpk 
10873448Sdh155122 	connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp];
10881676Sjpk 	mutex_enter(&connfp->connf_lock);
10891676Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
10901676Sjpk 	    tconn = tconn->conn_next) {
10911676Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
10921676Sjpk 		if (connp->conn_af_isv6 != tconn->conn_af_isv6)
10931676Sjpk 			continue;
10941676Sjpk 		/* If neither is exempt, then there's no conflict */
10951676Sjpk 		if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt)
10961676Sjpk 			continue;
10971676Sjpk 		/* If both are bound to different specific addrs, ok */
10981676Sjpk 		if (connp->conn_src != INADDR_ANY &&
10991676Sjpk 		    tconn->conn_src != INADDR_ANY &&
11001676Sjpk 		    connp->conn_src != tconn->conn_src)
11011676Sjpk 			continue;
11021676Sjpk 		/* These two conflict; fail */
11031676Sjpk 		break;
11041676Sjpk 	}
11051676Sjpk 	mutex_exit(&connfp->connf_lock);
11061676Sjpk 	return (tconn != NULL);
11071676Sjpk }
11081676Sjpk 
11091676Sjpk static boolean_t
11103448Sdh155122 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
11111676Sjpk {
11121676Sjpk 	connf_t	*connfp;
11131676Sjpk 	conn_t *tconn;
11141676Sjpk 
11153448Sdh155122 	connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp];
11161676Sjpk 	mutex_enter(&connfp->connf_lock);
11171676Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
11181676Sjpk 	    tconn = tconn->conn_next) {
11191676Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
11201676Sjpk 		if (connp->conn_af_isv6 != tconn->conn_af_isv6)
11211676Sjpk 			continue;
11221676Sjpk 		/* If neither is exempt, then there's no conflict */
11231676Sjpk 		if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt)
11241676Sjpk 			continue;
11251676Sjpk 		/* If both are bound to different addrs, ok */
11261676Sjpk 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) &&
11271676Sjpk 		    !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) &&
11281676Sjpk 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6))
11291676Sjpk 			continue;
11301676Sjpk 		/* These two conflict; fail */
11311676Sjpk 		break;
11321676Sjpk 	}
11331676Sjpk 	mutex_exit(&connfp->connf_lock);
11341676Sjpk 	return (tconn != NULL);
11351676Sjpk }
11361676Sjpk 
11371676Sjpk /*
11380Sstevel@tonic-gate  * (v4, v6) bind hash insertion routines
11390Sstevel@tonic-gate  */
11400Sstevel@tonic-gate int
11410Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport)
11420Sstevel@tonic-gate {
11430Sstevel@tonic-gate 	connf_t	*connfp;
11440Sstevel@tonic-gate #ifdef	IPCL_DEBUG
11450Sstevel@tonic-gate 	char	buf[INET_NTOA_BUFSIZE];
11460Sstevel@tonic-gate #endif
11470Sstevel@tonic-gate 	int	ret = 0;
11483448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
11490Sstevel@tonic-gate 
11500Sstevel@tonic-gate 	ASSERT(connp);
11510Sstevel@tonic-gate 
11520Sstevel@tonic-gate 	IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, "
11530Sstevel@tonic-gate 	    "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport));
11540Sstevel@tonic-gate 
11550Sstevel@tonic-gate 	connp->conn_ulp = protocol;
11560Sstevel@tonic-gate 	IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6);
11570Sstevel@tonic-gate 	connp->conn_lport = lport;
11580Sstevel@tonic-gate 
11590Sstevel@tonic-gate 	switch (protocol) {
11601676Sjpk 	default:
11613448Sdh155122 		if (is_system_labeled() &&
11623448Sdh155122 		    check_exempt_conflict_v4(connp, ipst))
11631676Sjpk 			return (EADDRINUSE);
11641676Sjpk 		/* FALLTHROUGH */
11650Sstevel@tonic-gate 	case IPPROTO_UDP:
11660Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
11670Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
11680Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - udp\n",
11690Sstevel@tonic-gate 			    (void *)connp));
11703448Sdh155122 			connfp = &ipst->ips_ipcl_udp_fanout[
11713448Sdh155122 			    IPCL_UDP_HASH(lport, ipst)];
11720Sstevel@tonic-gate 		} else {
11730Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
11740Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - protocol\n",
11750Sstevel@tonic-gate 			    (void *)connp));
11763448Sdh155122 			connfp = &ipst->ips_ipcl_proto_fanout[protocol];
11770Sstevel@tonic-gate 		}
11780Sstevel@tonic-gate 
11790Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
11800Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
11810Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
11820Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
11830Sstevel@tonic-gate 		} else {
11840Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
11850Sstevel@tonic-gate 		}
11860Sstevel@tonic-gate 		break;
11870Sstevel@tonic-gate 
11880Sstevel@tonic-gate 	case IPPROTO_TCP:
11890Sstevel@tonic-gate 
11900Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
11911676Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
11923448Sdh155122 		connfp = &ipst->ips_ipcl_bind_fanout[
11933448Sdh155122 		    IPCL_BIND_HASH(lport, ipst)];
11940Sstevel@tonic-gate 		if (connp->conn_src != INADDR_ANY) {
11950Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
11960Sstevel@tonic-gate 		} else {
11970Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
11980Sstevel@tonic-gate 		}
11990Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
12000Sstevel@tonic-gate 			ASSERT(!connp->conn_pkt_isv6);
12010Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
12028392SHuafeng.Lv@Sun.COM 			(*cl_inet_listen)(
12038392SHuafeng.Lv@Sun.COM 			    connp->conn_netstack->netstack_stackid,
12048392SHuafeng.Lv@Sun.COM 			    IPPROTO_TCP, AF_INET,
12058392SHuafeng.Lv@Sun.COM 			    (uint8_t *)&connp->conn_bound_source, lport, NULL);
12060Sstevel@tonic-gate 		}
12070Sstevel@tonic-gate 		break;
12080Sstevel@tonic-gate 
12090Sstevel@tonic-gate 	case IPPROTO_SCTP:
12100Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
12110Sstevel@tonic-gate 		break;
12120Sstevel@tonic-gate 	}
12130Sstevel@tonic-gate 
12140Sstevel@tonic-gate 	return (ret);
12150Sstevel@tonic-gate }
12160Sstevel@tonic-gate 
12170Sstevel@tonic-gate int
12180Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
12190Sstevel@tonic-gate     uint16_t lport)
12200Sstevel@tonic-gate {
12210Sstevel@tonic-gate 	connf_t	*connfp;
12220Sstevel@tonic-gate 	int	ret = 0;
12233448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
12240Sstevel@tonic-gate 
12250Sstevel@tonic-gate 	ASSERT(connp);
12260Sstevel@tonic-gate 
12270Sstevel@tonic-gate 	connp->conn_ulp = protocol;
12280Sstevel@tonic-gate 	connp->conn_srcv6 = *src;
12290Sstevel@tonic-gate 	connp->conn_lport = lport;
12300Sstevel@tonic-gate 
12310Sstevel@tonic-gate 	switch (protocol) {
12321676Sjpk 	default:
12333448Sdh155122 		if (is_system_labeled() &&
12343448Sdh155122 		    check_exempt_conflict_v6(connp, ipst))
12351676Sjpk 			return (EADDRINUSE);
12361676Sjpk 		/* FALLTHROUGH */
12370Sstevel@tonic-gate 	case IPPROTO_UDP:
12380Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
12390Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
12400Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - udp\n",
12410Sstevel@tonic-gate 			    (void *)connp));
12423448Sdh155122 			connfp = &ipst->ips_ipcl_udp_fanout[
12433448Sdh155122 			    IPCL_UDP_HASH(lport, ipst)];
12440Sstevel@tonic-gate 		} else {
12450Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
12460Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - protocol\n",
12470Sstevel@tonic-gate 			    (void *)connp));
12483448Sdh155122 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
12490Sstevel@tonic-gate 		}
12500Sstevel@tonic-gate 
12510Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
12520Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
12530Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
12540Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12550Sstevel@tonic-gate 		} else {
12560Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12570Sstevel@tonic-gate 		}
12580Sstevel@tonic-gate 		break;
12590Sstevel@tonic-gate 
12600Sstevel@tonic-gate 	case IPPROTO_TCP:
12610Sstevel@tonic-gate 		/* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */
12620Sstevel@tonic-gate 
12630Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
12641676Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
12653448Sdh155122 		connfp = &ipst->ips_ipcl_bind_fanout[
12663448Sdh155122 		    IPCL_BIND_HASH(lport, ipst)];
12670Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
12680Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12690Sstevel@tonic-gate 		} else {
12700Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12710Sstevel@tonic-gate 		}
12720Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
12730Sstevel@tonic-gate 			sa_family_t	addr_family;
12740Sstevel@tonic-gate 			uint8_t		*laddrp;
12750Sstevel@tonic-gate 
12760Sstevel@tonic-gate 			if (connp->conn_pkt_isv6) {
12770Sstevel@tonic-gate 				addr_family = AF_INET6;
12780Sstevel@tonic-gate 				laddrp =
12790Sstevel@tonic-gate 				    (uint8_t *)&connp->conn_bound_source_v6;
12800Sstevel@tonic-gate 			} else {
12810Sstevel@tonic-gate 				addr_family = AF_INET;
12820Sstevel@tonic-gate 				laddrp = (uint8_t *)&connp->conn_bound_source;
12830Sstevel@tonic-gate 			}
12840Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
12858392SHuafeng.Lv@Sun.COM 			(*cl_inet_listen)(
12868392SHuafeng.Lv@Sun.COM 			    connp->conn_netstack->netstack_stackid,
12878392SHuafeng.Lv@Sun.COM 			    IPPROTO_TCP, addr_family, laddrp, lport, NULL);
12880Sstevel@tonic-gate 		}
12890Sstevel@tonic-gate 		break;
12900Sstevel@tonic-gate 
12910Sstevel@tonic-gate 	case IPPROTO_SCTP:
12920Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
12930Sstevel@tonic-gate 		break;
12940Sstevel@tonic-gate 	}
12950Sstevel@tonic-gate 
12960Sstevel@tonic-gate 	return (ret);
12970Sstevel@tonic-gate }
12980Sstevel@tonic-gate 
12990Sstevel@tonic-gate /*
13000Sstevel@tonic-gate  * ipcl_conn_hash insertion routines.
13010Sstevel@tonic-gate  */
13020Sstevel@tonic-gate int
13030Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src,
13040Sstevel@tonic-gate     ipaddr_t rem, uint32_t ports)
13050Sstevel@tonic-gate {
13060Sstevel@tonic-gate 	connf_t		*connfp;
13070Sstevel@tonic-gate 	uint16_t	*up;
13080Sstevel@tonic-gate 	conn_t		*tconnp;
13090Sstevel@tonic-gate #ifdef	IPCL_DEBUG
13100Sstevel@tonic-gate 	char	sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE];
13110Sstevel@tonic-gate #endif
13120Sstevel@tonic-gate 	in_port_t	lport;
13130Sstevel@tonic-gate 	int		ret = 0;
13143448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
13150Sstevel@tonic-gate 
13160Sstevel@tonic-gate 	IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, "
13170Sstevel@tonic-gate 	    "dst = %s, ports = %x, protocol = %x", (void *)connp,
13180Sstevel@tonic-gate 	    inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf),
13190Sstevel@tonic-gate 	    ports, protocol));
13200Sstevel@tonic-gate 
13210Sstevel@tonic-gate 	switch (protocol) {
13220Sstevel@tonic-gate 	case IPPROTO_TCP:
13230Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
13240Sstevel@tonic-gate 			/*
13250Sstevel@tonic-gate 			 * for a eager connection, i.e connections which
13260Sstevel@tonic-gate 			 * have just been created, the initialization is
13270Sstevel@tonic-gate 			 * already done in ip at conn_creation time, so
13280Sstevel@tonic-gate 			 * we can skip the checks here.
13290Sstevel@tonic-gate 			 */
13300Sstevel@tonic-gate 			IPCL_CONN_INIT(connp, protocol, src, rem, ports);
13310Sstevel@tonic-gate 		}
13328432SJonathan.Anderson@Sun.COM 
13338432SJonathan.Anderson@Sun.COM 		/*
13348432SJonathan.Anderson@Sun.COM 		 * For tcp, we check whether the connection tuple already
13358432SJonathan.Anderson@Sun.COM 		 * exists before allowing the connection to proceed.  We
13368432SJonathan.Anderson@Sun.COM 		 * also allow indexing on the zoneid. This is to allow
13378432SJonathan.Anderson@Sun.COM 		 * multiple shared stack zones to have the same tcp
13388432SJonathan.Anderson@Sun.COM 		 * connection tuple. In practice this only happens for
13398432SJonathan.Anderson@Sun.COM 		 * INADDR_LOOPBACK as it's the only local address which
13408432SJonathan.Anderson@Sun.COM 		 * doesn't have to be unique.
13418432SJonathan.Anderson@Sun.COM 		 */
13423448Sdh155122 		connfp = &ipst->ips_ipcl_conn_fanout[
13433448Sdh155122 		    IPCL_CONN_HASH(connp->conn_rem,
13443448Sdh155122 		    connp->conn_ports, ipst)];
13450Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
13460Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
13470Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
13488432SJonathan.Anderson@Sun.COM 			if ((IPCL_CONN_MATCH(tconnp, connp->conn_ulp,
13490Sstevel@tonic-gate 			    connp->conn_rem, connp->conn_src,
13508432SJonathan.Anderson@Sun.COM 			    connp->conn_ports)) &&
13518432SJonathan.Anderson@Sun.COM 			    (IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid))) {
13520Sstevel@tonic-gate 
13530Sstevel@tonic-gate 				/* Already have a conn. bail out */
13540Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
13550Sstevel@tonic-gate 				return (EADDRINUSE);
13560Sstevel@tonic-gate 			}
13570Sstevel@tonic-gate 		}
13580Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
13590Sstevel@tonic-gate 			/*
13600Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
13610Sstevel@tonic-gate 			 * rebind. Let it happen.
13620Sstevel@tonic-gate 			 */
13630Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
13640Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
13650Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
13660Sstevel@tonic-gate 		}
13673104Sjprakash 
13683104Sjprakash 		ASSERT(connp->conn_recv != NULL);
13693104Sjprakash 
13700Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
13710Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
13720Sstevel@tonic-gate 		break;
13730Sstevel@tonic-gate 
13740Sstevel@tonic-gate 	case IPPROTO_SCTP:
1375409Skcpoon 		/*
1376409Skcpoon 		 * The raw socket may have already been bound, remove it
1377409Skcpoon 		 * from the hash first.
1378409Skcpoon 		 */
1379409Skcpoon 		IPCL_HASH_REMOVE(connp);
1380409Skcpoon 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
13810Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
13820Sstevel@tonic-gate 		break;
13830Sstevel@tonic-gate 
13841676Sjpk 	default:
13851676Sjpk 		/*
13861676Sjpk 		 * Check for conflicts among MAC exempt bindings.  For
13871676Sjpk 		 * transports with port numbers, this is done by the upper
13881676Sjpk 		 * level per-transport binding logic.  For all others, it's
13891676Sjpk 		 * done here.
13901676Sjpk 		 */
13913448Sdh155122 		if (is_system_labeled() &&
13923448Sdh155122 		    check_exempt_conflict_v4(connp, ipst))
13931676Sjpk 			return (EADDRINUSE);
13941676Sjpk 		/* FALLTHROUGH */
13951676Sjpk 
13960Sstevel@tonic-gate 	case IPPROTO_UDP:
13970Sstevel@tonic-gate 		up = (uint16_t *)&ports;
13980Sstevel@tonic-gate 		IPCL_CONN_INIT(connp, protocol, src, rem, ports);
13990Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
14003448Sdh155122 			connfp = &ipst->ips_ipcl_udp_fanout[
14013448Sdh155122 			    IPCL_UDP_HASH(up[1], ipst)];
14020Sstevel@tonic-gate 		} else {
14033448Sdh155122 			connfp = &ipst->ips_ipcl_proto_fanout[protocol];
14040Sstevel@tonic-gate 		}
14050Sstevel@tonic-gate 
14060Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
14070Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
14080Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
14090Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
14100Sstevel@tonic-gate 		} else {
14110Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
14120Sstevel@tonic-gate 		}
14130Sstevel@tonic-gate 		break;
14140Sstevel@tonic-gate 	}
14150Sstevel@tonic-gate 
14160Sstevel@tonic-gate 	return (ret);
14170Sstevel@tonic-gate }
14180Sstevel@tonic-gate 
14190Sstevel@tonic-gate int
14200Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
14210Sstevel@tonic-gate     const in6_addr_t *rem, uint32_t ports, uint_t ifindex)
14220Sstevel@tonic-gate {
14230Sstevel@tonic-gate 	connf_t		*connfp;
14240Sstevel@tonic-gate 	uint16_t	*up;
14250Sstevel@tonic-gate 	conn_t		*tconnp;
14260Sstevel@tonic-gate 	in_port_t	lport;
14270Sstevel@tonic-gate 	int		ret = 0;
14283448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
14290Sstevel@tonic-gate 
14300Sstevel@tonic-gate 	switch (protocol) {
14310Sstevel@tonic-gate 	case IPPROTO_TCP:
14320Sstevel@tonic-gate 		/* Just need to insert a conn struct */
14330Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
14340Sstevel@tonic-gate 			IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
14350Sstevel@tonic-gate 		}
14368432SJonathan.Anderson@Sun.COM 
14378432SJonathan.Anderson@Sun.COM 		/*
14388432SJonathan.Anderson@Sun.COM 		 * For tcp, we check whether the connection tuple already
14398432SJonathan.Anderson@Sun.COM 		 * exists before allowing the connection to proceed.  We
14408432SJonathan.Anderson@Sun.COM 		 * also allow indexing on the zoneid. This is to allow
14418432SJonathan.Anderson@Sun.COM 		 * multiple shared stack zones to have the same tcp
14428432SJonathan.Anderson@Sun.COM 		 * connection tuple. In practice this only happens for
14438432SJonathan.Anderson@Sun.COM 		 * ipv6_loopback as it's the only local address which
14448432SJonathan.Anderson@Sun.COM 		 * doesn't have to be unique.
14458432SJonathan.Anderson@Sun.COM 		 */
14463448Sdh155122 		connfp = &ipst->ips_ipcl_conn_fanout[
14473448Sdh155122 		    IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports,
14483448Sdh155122 		    ipst)];
14490Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
14500Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
14510Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
14520Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp,
14530Sstevel@tonic-gate 			    connp->conn_remv6, connp->conn_srcv6,
14540Sstevel@tonic-gate 			    connp->conn_ports) &&
14550Sstevel@tonic-gate 			    (tconnp->conn_tcp->tcp_bound_if == 0 ||
14568432SJonathan.Anderson@Sun.COM 			    tconnp->conn_tcp->tcp_bound_if == ifindex) &&
14578432SJonathan.Anderson@Sun.COM 			    (IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid))) {
14580Sstevel@tonic-gate 				/* Already have a conn. bail out */
14590Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
14600Sstevel@tonic-gate 				return (EADDRINUSE);
14610Sstevel@tonic-gate 			}
14620Sstevel@tonic-gate 		}
14630Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
14640Sstevel@tonic-gate 			/*
14650Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
14660Sstevel@tonic-gate 			 * rebind. Let it happen.
14670Sstevel@tonic-gate 			 */
14680Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
14690Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
14700Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
14710Sstevel@tonic-gate 		}
14720Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
14730Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
14740Sstevel@tonic-gate 		break;
14750Sstevel@tonic-gate 
14760Sstevel@tonic-gate 	case IPPROTO_SCTP:
1477409Skcpoon 		IPCL_HASH_REMOVE(connp);
1478409Skcpoon 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
14790Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
14800Sstevel@tonic-gate 		break;
14810Sstevel@tonic-gate 
14821676Sjpk 	default:
14833448Sdh155122 		if (is_system_labeled() &&
14843448Sdh155122 		    check_exempt_conflict_v6(connp, ipst))
14851676Sjpk 			return (EADDRINUSE);
14861676Sjpk 		/* FALLTHROUGH */
14870Sstevel@tonic-gate 	case IPPROTO_UDP:
14880Sstevel@tonic-gate 		up = (uint16_t *)&ports;
14890Sstevel@tonic-gate 		IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
14900Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
14913448Sdh155122 			connfp = &ipst->ips_ipcl_udp_fanout[
14923448Sdh155122 			    IPCL_UDP_HASH(up[1], ipst)];
14930Sstevel@tonic-gate 		} else {
14943448Sdh155122 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
14950Sstevel@tonic-gate 		}
14960Sstevel@tonic-gate 
14970Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
14980Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
14990Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
15000Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
15010Sstevel@tonic-gate 		} else {
15020Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
15030Sstevel@tonic-gate 		}
15040Sstevel@tonic-gate 		break;
15050Sstevel@tonic-gate 	}
15060Sstevel@tonic-gate 
15070Sstevel@tonic-gate 	return (ret);
15080Sstevel@tonic-gate }
15090Sstevel@tonic-gate 
15100Sstevel@tonic-gate /*
15110Sstevel@tonic-gate  * v4 packet classifying function. looks up the fanout table to
15120Sstevel@tonic-gate  * find the conn, the packet belongs to. returns the conn with
15130Sstevel@tonic-gate  * the reference held, null otherwise.
15141676Sjpk  *
15151676Sjpk  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
15161676Sjpk  * Lookup" comment block are applied.  Labels are also checked as described
15171676Sjpk  * above.  If the packet is from the inside (looped back), and is from the same
15181676Sjpk  * zone, then label checks are omitted.
15190Sstevel@tonic-gate  */
15200Sstevel@tonic-gate conn_t *
15213448Sdh155122 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid,
15223448Sdh155122     ip_stack_t *ipst)
15230Sstevel@tonic-gate {
15240Sstevel@tonic-gate 	ipha_t	*ipha;
15250Sstevel@tonic-gate 	connf_t	*connfp, *bind_connfp;
15260Sstevel@tonic-gate 	uint16_t lport;
15270Sstevel@tonic-gate 	uint16_t fport;
15280Sstevel@tonic-gate 	uint32_t ports;
15290Sstevel@tonic-gate 	conn_t	*connp;
15300Sstevel@tonic-gate 	uint16_t  *up;
15311676Sjpk 	boolean_t shared_addr;
15321676Sjpk 	boolean_t unlabeled;
15330Sstevel@tonic-gate 
15340Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
15350Sstevel@tonic-gate 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
15360Sstevel@tonic-gate 
15370Sstevel@tonic-gate 	switch (protocol) {
15380Sstevel@tonic-gate 	case IPPROTO_TCP:
15390Sstevel@tonic-gate 		ports = *(uint32_t *)up;
15400Sstevel@tonic-gate 		connfp =
15413448Sdh155122 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
15423448Sdh155122 		    ports, ipst)];
15430Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
15440Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
15450Sstevel@tonic-gate 		    connp = connp->conn_next) {
15468432SJonathan.Anderson@Sun.COM 			if ((IPCL_CONN_MATCH(connp, protocol,
15478432SJonathan.Anderson@Sun.COM 			    ipha->ipha_src, ipha->ipha_dst, ports)) &&
15488432SJonathan.Anderson@Sun.COM 			    (IPCL_ZONE_MATCH(connp, zoneid))) {
15490Sstevel@tonic-gate 				break;
15508432SJonathan.Anderson@Sun.COM 			}
15510Sstevel@tonic-gate 		}
15520Sstevel@tonic-gate 
15530Sstevel@tonic-gate 		if (connp != NULL) {
15541676Sjpk 			/*
15551676Sjpk 			 * We have a fully-bound TCP connection.
15561676Sjpk 			 *
15571676Sjpk 			 * For labeled systems, there's no need to check the
15581676Sjpk 			 * label here.  It's known to be good as we checked
15591676Sjpk 			 * before allowing the connection to become bound.
15601676Sjpk 			 */
15610Sstevel@tonic-gate 			CONN_INC_REF(connp);
15620Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
15630Sstevel@tonic-gate 			return (connp);
15640Sstevel@tonic-gate 		}
15650Sstevel@tonic-gate 
15660Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
15670Sstevel@tonic-gate 
15680Sstevel@tonic-gate 		lport = up[1];
15691676Sjpk 		unlabeled = B_FALSE;
15701676Sjpk 		/* Cred cannot be null on IPv4 */
15711676Sjpk 		if (is_system_labeled())
15721676Sjpk 			unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags &
15731676Sjpk 			    TSLF_UNLABELED) != 0;
15741676Sjpk 		shared_addr = (zoneid == ALL_ZONES);
15751676Sjpk 		if (shared_addr) {
15763448Sdh155122 			/*
15773448Sdh155122 			 * No need to handle exclusive-stack zones since
15783448Sdh155122 			 * ALL_ZONES only applies to the shared stack.
15793448Sdh155122 			 */
15801676Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
15811676Sjpk 			/*
15821676Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
15831676Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
15841676Sjpk 			 * search for the zone based on the packet label.
15851676Sjpk 			 *
15861676Sjpk 			 * If there is such a zone, we prefer to find a
15871676Sjpk 			 * connection in it.  Otherwise, we look for a
15881676Sjpk 			 * MAC-exempt connection in any zone whose label
15891676Sjpk 			 * dominates the default label on the packet.
15901676Sjpk 			 */
15911676Sjpk 			if (zoneid == ALL_ZONES)
15921676Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
15931676Sjpk 			else
15941676Sjpk 				unlabeled = B_FALSE;
15951676Sjpk 		}
15961676Sjpk 
15973448Sdh155122 		bind_connfp =
15983448Sdh155122 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
15990Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
16000Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
16010Sstevel@tonic-gate 		    connp = connp->conn_next) {
16021676Sjpk 			if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
16032263Ssommerfe 			    lport) && (IPCL_ZONE_MATCH(connp, zoneid) ||
16041676Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
16050Sstevel@tonic-gate 				break;
16060Sstevel@tonic-gate 		}
16070Sstevel@tonic-gate 
16081676Sjpk 		/*
16091676Sjpk 		 * If the matching connection is SLP on a private address, then
16101676Sjpk 		 * the label on the packet must match the local zone's label.
16111676Sjpk 		 * Otherwise, it must be in the label range defined by tnrh.
16121676Sjpk 		 * This is ensured by tsol_receive_label.
16131676Sjpk 		 */
16141676Sjpk 		if (connp != NULL && is_system_labeled() &&
16151676Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
16161676Sjpk 		    shared_addr, connp)) {
16171676Sjpk 				DTRACE_PROBE3(
16181676Sjpk 				    tx__ip__log__info__classify__tcp,
16191676Sjpk 				    char *,
16201676Sjpk 				    "connp(1) could not receive mp(2)",
16211676Sjpk 				    conn_t *, connp, mblk_t *, mp);
16221676Sjpk 			connp = NULL;
16231676Sjpk 		}
16241676Sjpk 
16250Sstevel@tonic-gate 		if (connp != NULL) {
16261676Sjpk 			/* Have a listener at least */
16270Sstevel@tonic-gate 			CONN_INC_REF(connp);
16280Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
16290Sstevel@tonic-gate 			return (connp);
16300Sstevel@tonic-gate 		}
16310Sstevel@tonic-gate 
16320Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
16330Sstevel@tonic-gate 
16340Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
16350Sstevel@tonic-gate 		    ("ipcl_classify: couldn't classify mp = %p\n",
16360Sstevel@tonic-gate 		    (void *)mp));
16370Sstevel@tonic-gate 		break;
16380Sstevel@tonic-gate 
16390Sstevel@tonic-gate 	case IPPROTO_UDP:
16400Sstevel@tonic-gate 		lport = up[1];
16411676Sjpk 		unlabeled = B_FALSE;
16421676Sjpk 		/* Cred cannot be null on IPv4 */
16431676Sjpk 		if (is_system_labeled())
16441676Sjpk 			unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags &
16451676Sjpk 			    TSLF_UNLABELED) != 0;
16461676Sjpk 		shared_addr = (zoneid == ALL_ZONES);
16471676Sjpk 		if (shared_addr) {
16483448Sdh155122 			/*
16493448Sdh155122 			 * No need to handle exclusive-stack zones since
16503448Sdh155122 			 * ALL_ZONES only applies to the shared stack.
16513448Sdh155122 			 */
16521676Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
16531676Sjpk 			/*
16541676Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
16551676Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
16561676Sjpk 			 * search for the zone based on the packet label.
16571676Sjpk 			 *
16581676Sjpk 			 * If there is such a zone, we prefer to find a
16591676Sjpk 			 * connection in it.  Otherwise, we look for a
16601676Sjpk 			 * MAC-exempt connection in any zone whose label
16611676Sjpk 			 * dominates the default label on the packet.
16621676Sjpk 			 */
16631676Sjpk 			if (zoneid == ALL_ZONES)
16641676Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
16651676Sjpk 			else
16661676Sjpk 				unlabeled = B_FALSE;
16671676Sjpk 		}
16680Sstevel@tonic-gate 		fport = up[0];
16690Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport));
16703448Sdh155122 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
16710Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
16720Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
16730Sstevel@tonic-gate 		    connp = connp->conn_next) {
16740Sstevel@tonic-gate 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
16750Sstevel@tonic-gate 			    fport, ipha->ipha_src) &&
16762263Ssommerfe 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
16771676Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
16780Sstevel@tonic-gate 				break;
16790Sstevel@tonic-gate 		}
16800Sstevel@tonic-gate 
16811676Sjpk 		if (connp != NULL && is_system_labeled() &&
16821676Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
16831676Sjpk 		    shared_addr, connp)) {
16841676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp,
16851676Sjpk 			    char *, "connp(1) could not receive mp(2)",
16861676Sjpk 			    conn_t *, connp, mblk_t *, mp);
16871676Sjpk 			connp = NULL;
16881676Sjpk 		}
16891676Sjpk 
16900Sstevel@tonic-gate 		if (connp != NULL) {
16910Sstevel@tonic-gate 			CONN_INC_REF(connp);
16920Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
16930Sstevel@tonic-gate 			return (connp);
16940Sstevel@tonic-gate 		}
16950Sstevel@tonic-gate 
16960Sstevel@tonic-gate 		/*
16970Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
16980Sstevel@tonic-gate 		 */
16990Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
17000Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
17010Sstevel@tonic-gate 		    ("ipcl_classify: cant find udp conn_t for ports : %x %x",
17020Sstevel@tonic-gate 		    lport, fport));
17030Sstevel@tonic-gate 		break;
17040Sstevel@tonic-gate 	}
17050Sstevel@tonic-gate 
17060Sstevel@tonic-gate 	return (NULL);
17070Sstevel@tonic-gate }
17080Sstevel@tonic-gate 
17090Sstevel@tonic-gate conn_t *
17103448Sdh155122 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid,
17113448Sdh155122     ip_stack_t *ipst)
17120Sstevel@tonic-gate {
17130Sstevel@tonic-gate 	ip6_t		*ip6h;
17140Sstevel@tonic-gate 	connf_t		*connfp, *bind_connfp;
17150Sstevel@tonic-gate 	uint16_t	lport;
17160Sstevel@tonic-gate 	uint16_t	fport;
17170Sstevel@tonic-gate 	tcph_t		*tcph;
17180Sstevel@tonic-gate 	uint32_t	ports;
17190Sstevel@tonic-gate 	conn_t		*connp;
17200Sstevel@tonic-gate 	uint16_t	*up;
17211676Sjpk 	boolean_t	shared_addr;
17221676Sjpk 	boolean_t	unlabeled;
17230Sstevel@tonic-gate 
17240Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
17250Sstevel@tonic-gate 
17260Sstevel@tonic-gate 	switch (protocol) {
17270Sstevel@tonic-gate 	case IPPROTO_TCP:
17280Sstevel@tonic-gate 		tcph = (tcph_t *)&mp->b_rptr[hdr_len];
17290Sstevel@tonic-gate 		up = (uint16_t *)tcph->th_lport;
17300Sstevel@tonic-gate 		ports = *(uint32_t *)up;
17310Sstevel@tonic-gate 
17320Sstevel@tonic-gate 		connfp =
17333448Sdh155122 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
17343448Sdh155122 		    ports, ipst)];
17350Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
17360Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
17370Sstevel@tonic-gate 		    connp = connp->conn_next) {
17388432SJonathan.Anderson@Sun.COM 			if ((IPCL_CONN_MATCH_V6(connp, protocol,
17398432SJonathan.Anderson@Sun.COM 			    ip6h->ip6_src, ip6h->ip6_dst, ports)) &&
17408432SJonathan.Anderson@Sun.COM 			    (IPCL_ZONE_MATCH(connp, zoneid))) {
17410Sstevel@tonic-gate 				break;
17428432SJonathan.Anderson@Sun.COM 			}
17430Sstevel@tonic-gate 		}
17440Sstevel@tonic-gate 
17450Sstevel@tonic-gate 		if (connp != NULL) {
17461676Sjpk 			/*
17471676Sjpk 			 * We have a fully-bound TCP connection.
17481676Sjpk 			 *
17491676Sjpk 			 * For labeled systems, there's no need to check the
17501676Sjpk 			 * label here.  It's known to be good as we checked
17511676Sjpk 			 * before allowing the connection to become bound.
17521676Sjpk 			 */
17530Sstevel@tonic-gate 			CONN_INC_REF(connp);
17540Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17550Sstevel@tonic-gate 			return (connp);
17560Sstevel@tonic-gate 		}
17570Sstevel@tonic-gate 
17580Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
17590Sstevel@tonic-gate 
17600Sstevel@tonic-gate 		lport = up[1];
17611676Sjpk 		unlabeled = B_FALSE;
17621676Sjpk 		/* Cred can be null on IPv6 */
17631676Sjpk 		if (is_system_labeled()) {
17641676Sjpk 			cred_t *cr = DB_CRED(mp);
17651676Sjpk 
17661676Sjpk 			unlabeled = (cr != NULL &&
17671676Sjpk 			    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
17681676Sjpk 		}
17691676Sjpk 		shared_addr = (zoneid == ALL_ZONES);
17701676Sjpk 		if (shared_addr) {
17713448Sdh155122 			/*
17723448Sdh155122 			 * No need to handle exclusive-stack zones since
17733448Sdh155122 			 * ALL_ZONES only applies to the shared stack.
17743448Sdh155122 			 */
17751676Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
17761676Sjpk 			/*
17771676Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
17781676Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
17791676Sjpk 			 * search for the zone based on the packet label.
17801676Sjpk 			 *
17811676Sjpk 			 * If there is such a zone, we prefer to find a
17821676Sjpk 			 * connection in it.  Otherwise, we look for a
17831676Sjpk 			 * MAC-exempt connection in any zone whose label
17841676Sjpk 			 * dominates the default label on the packet.
17851676Sjpk 			 */
17861676Sjpk 			if (zoneid == ALL_ZONES)
17871676Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
17881676Sjpk 			else
17891676Sjpk 				unlabeled = B_FALSE;
17901676Sjpk 		}
17911676Sjpk 
17923448Sdh155122 		bind_connfp =
17933448Sdh155122 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
17940Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
17950Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
17960Sstevel@tonic-gate 		    connp = connp->conn_next) {
17970Sstevel@tonic-gate 			if (IPCL_BIND_MATCH_V6(connp, protocol,
17980Sstevel@tonic-gate 			    ip6h->ip6_dst, lport) &&
17992263Ssommerfe 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
18001676Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
18010Sstevel@tonic-gate 				break;
18020Sstevel@tonic-gate 		}
18030Sstevel@tonic-gate 
18041676Sjpk 		if (connp != NULL && is_system_labeled() &&
18051676Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
18061676Sjpk 		    shared_addr, connp)) {
18071676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
18081676Sjpk 			    char *, "connp(1) could not receive mp(2)",
18091676Sjpk 			    conn_t *, connp, mblk_t *, mp);
18101676Sjpk 			connp = NULL;
18111676Sjpk 		}
18121676Sjpk 
18130Sstevel@tonic-gate 		if (connp != NULL) {
18140Sstevel@tonic-gate 			/* Have a listner at least */
18150Sstevel@tonic-gate 			CONN_INC_REF(connp);
18160Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
18170Sstevel@tonic-gate 			IPCL_DEBUG_LVL(512,
18180Sstevel@tonic-gate 			    ("ipcl_classify_v6: found listner "
18190Sstevel@tonic-gate 			    "connp = %p\n", (void *)connp));
18200Sstevel@tonic-gate 
18210Sstevel@tonic-gate 			return (connp);
18220Sstevel@tonic-gate 		}
18230Sstevel@tonic-gate 
18240Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
18250Sstevel@tonic-gate 
18260Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
18270Sstevel@tonic-gate 		    ("ipcl_classify_v6: couldn't classify mp = %p\n",
18280Sstevel@tonic-gate 		    (void *)mp));
18290Sstevel@tonic-gate 		break;
18300Sstevel@tonic-gate 
18310Sstevel@tonic-gate 	case IPPROTO_UDP:
18320Sstevel@tonic-gate 		up = (uint16_t *)&mp->b_rptr[hdr_len];
18330Sstevel@tonic-gate 		lport = up[1];
18341676Sjpk 		unlabeled = B_FALSE;
18351676Sjpk 		/* Cred can be null on IPv6 */
18361676Sjpk 		if (is_system_labeled()) {
18371676Sjpk 			cred_t *cr = DB_CRED(mp);
18381676Sjpk 
18391676Sjpk 			unlabeled = (cr != NULL &&
18401676Sjpk 			    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
18411676Sjpk 		}
18421676Sjpk 		shared_addr = (zoneid == ALL_ZONES);
18431676Sjpk 		if (shared_addr) {
18443448Sdh155122 			/*
18453448Sdh155122 			 * No need to handle exclusive-stack zones since
18463448Sdh155122 			 * ALL_ZONES only applies to the shared stack.
18473448Sdh155122 			 */
18481676Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
18491676Sjpk 			/*
18501676Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
18511676Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
18521676Sjpk 			 * search for the zone based on the packet label.
18531676Sjpk 			 *
18541676Sjpk 			 * If there is such a zone, we prefer to find a
18551676Sjpk 			 * connection in it.  Otherwise, we look for a
18561676Sjpk 			 * MAC-exempt connection in any zone whose label
18571676Sjpk 			 * dominates the default label on the packet.
18581676Sjpk 			 */
18591676Sjpk 			if (zoneid == ALL_ZONES)
18601676Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
18611676Sjpk 			else
18621676Sjpk 				unlabeled = B_FALSE;
18631676Sjpk 		}
18641676Sjpk 
18650Sstevel@tonic-gate 		fport = up[0];
18660Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport,
18670Sstevel@tonic-gate 		    fport));
18683448Sdh155122 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
18690Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
18700Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
18710Sstevel@tonic-gate 		    connp = connp->conn_next) {
18720Sstevel@tonic-gate 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
18730Sstevel@tonic-gate 			    fport, ip6h->ip6_src) &&
18742263Ssommerfe 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
18751676Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
18760Sstevel@tonic-gate 				break;
18770Sstevel@tonic-gate 		}
18780Sstevel@tonic-gate 
18791676Sjpk 		if (connp != NULL && is_system_labeled() &&
18801676Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
18811676Sjpk 		    shared_addr, connp)) {
18821676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
18831676Sjpk 			    char *, "connp(1) could not receive mp(2)",
18841676Sjpk 			    conn_t *, connp, mblk_t *, mp);
18851676Sjpk 			connp = NULL;
18861676Sjpk 		}
18871676Sjpk 
18880Sstevel@tonic-gate 		if (connp != NULL) {
18890Sstevel@tonic-gate 			CONN_INC_REF(connp);
18900Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
18910Sstevel@tonic-gate 			return (connp);
18920Sstevel@tonic-gate 		}
18930Sstevel@tonic-gate 
18940Sstevel@tonic-gate 		/*
18950Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
18960Sstevel@tonic-gate 		 */
18970Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
18980Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
18990Sstevel@tonic-gate 		    ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x",
19000Sstevel@tonic-gate 		    lport, fport));
19010Sstevel@tonic-gate 		break;
19020Sstevel@tonic-gate 	}
19030Sstevel@tonic-gate 
19040Sstevel@tonic-gate 	return (NULL);
19050Sstevel@tonic-gate }
19060Sstevel@tonic-gate 
19070Sstevel@tonic-gate /*
19080Sstevel@tonic-gate  * wrapper around ipcl_classify_(v4,v6) routines.
19090Sstevel@tonic-gate  */
19100Sstevel@tonic-gate conn_t *
19113448Sdh155122 ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst)
19120Sstevel@tonic-gate {
19130Sstevel@tonic-gate 	uint16_t	hdr_len;
19140Sstevel@tonic-gate 	ipha_t		*ipha;
19150Sstevel@tonic-gate 	uint8_t		*nexthdrp;
19160Sstevel@tonic-gate 
19170Sstevel@tonic-gate 	if (MBLKL(mp) < sizeof (ipha_t))
19180Sstevel@tonic-gate 		return (NULL);
19190Sstevel@tonic-gate 
19200Sstevel@tonic-gate 	switch (IPH_HDR_VERSION(mp->b_rptr)) {
19210Sstevel@tonic-gate 	case IPV4_VERSION:
19220Sstevel@tonic-gate 		ipha = (ipha_t *)mp->b_rptr;
19230Sstevel@tonic-gate 		hdr_len = IPH_HDR_LENGTH(ipha);
19240Sstevel@tonic-gate 		return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len,
19253448Sdh155122 		    zoneid, ipst));
19260Sstevel@tonic-gate 	case IPV6_VERSION:
19270Sstevel@tonic-gate 		if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr,
19280Sstevel@tonic-gate 		    &hdr_len, &nexthdrp))
19290Sstevel@tonic-gate 			return (NULL);
19300Sstevel@tonic-gate 
19313448Sdh155122 		return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst));
19320Sstevel@tonic-gate 	}
19330Sstevel@tonic-gate 
19340Sstevel@tonic-gate 	return (NULL);
19350Sstevel@tonic-gate }
19360Sstevel@tonic-gate 
19370Sstevel@tonic-gate conn_t *
19381676Sjpk ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid,
19393448Sdh155122     uint32_t ports, ipha_t *hdr, ip_stack_t *ipst)
19400Sstevel@tonic-gate {
19411676Sjpk 	connf_t		*connfp;
19420Sstevel@tonic-gate 	conn_t		*connp;
19430Sstevel@tonic-gate 	in_port_t	lport;
19440Sstevel@tonic-gate 	int		af;
19451676Sjpk 	boolean_t	shared_addr;
19461676Sjpk 	boolean_t	unlabeled;
19471676Sjpk 	const void	*dst;
19480Sstevel@tonic-gate 
19490Sstevel@tonic-gate 	lport = ((uint16_t *)&ports)[1];
19501676Sjpk 
19511676Sjpk 	unlabeled = B_FALSE;
19521676Sjpk 	/* Cred can be null on IPv6 */
19531676Sjpk 	if (is_system_labeled()) {
19541676Sjpk 		cred_t *cr = DB_CRED(mp);
19551676Sjpk 
19561676Sjpk 		unlabeled = (cr != NULL &&
19571676Sjpk 		    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
19581676Sjpk 	}
19591676Sjpk 	shared_addr = (zoneid == ALL_ZONES);
19601676Sjpk 	if (shared_addr) {
19613448Sdh155122 		/*
19623448Sdh155122 		 * No need to handle exclusive-stack zones since ALL_ZONES
19633448Sdh155122 		 * only applies to the shared stack.
19643448Sdh155122 		 */
19651676Sjpk 		zoneid = tsol_mlp_findzone(protocol, lport);
19661676Sjpk 		/*
19671676Sjpk 		 * If no shared MLP is found, tsol_mlp_findzone returns
19681676Sjpk 		 * ALL_ZONES.  In that case, we assume it's SLP, and search for
19691676Sjpk 		 * the zone based on the packet label.
19701676Sjpk 		 *
19711676Sjpk 		 * If there is such a zone, we prefer to find a connection in
19721676Sjpk 		 * it.  Otherwise, we look for a MAC-exempt connection in any
19731676Sjpk 		 * zone whose label dominates the default label on the packet.
19741676Sjpk 		 */
19751676Sjpk 		if (zoneid == ALL_ZONES)
19761676Sjpk 			zoneid = tsol_packet_to_zoneid(mp);
19771676Sjpk 		else
19781676Sjpk 			unlabeled = B_FALSE;
19791676Sjpk 	}
19801676Sjpk 
19810Sstevel@tonic-gate 	af = IPH_HDR_VERSION(hdr);
19821676Sjpk 	dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst :
19831676Sjpk 	    (const void *)&((ip6_t *)hdr)->ip6_dst;
19843448Sdh155122 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
19850Sstevel@tonic-gate 
19860Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
19870Sstevel@tonic-gate 	for (connp = connfp->connf_head; connp != NULL;
19880Sstevel@tonic-gate 	    connp = connp->conn_next) {
19890Sstevel@tonic-gate 		/* We don't allow v4 fallback for v6 raw socket. */
19901676Sjpk 		if (af == (connp->conn_af_isv6 ? IPV4_VERSION :
19911676Sjpk 		    IPV6_VERSION))
19920Sstevel@tonic-gate 			continue;
19930Sstevel@tonic-gate 		if (connp->conn_fully_bound) {
19940Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
19951676Sjpk 				if (!IPCL_CONN_MATCH(connp, protocol,
19961676Sjpk 				    hdr->ipha_src, hdr->ipha_dst, ports))
19971676Sjpk 					continue;
19980Sstevel@tonic-gate 			} else {
19991676Sjpk 				if (!IPCL_CONN_MATCH_V6(connp, protocol,
20000Sstevel@tonic-gate 				    ((ip6_t *)hdr)->ip6_src,
20011676Sjpk 				    ((ip6_t *)hdr)->ip6_dst, ports))
20021676Sjpk 					continue;
20030Sstevel@tonic-gate 			}
20040Sstevel@tonic-gate 		} else {
20050Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
20061676Sjpk 				if (!IPCL_BIND_MATCH(connp, protocol,
20071676Sjpk 				    hdr->ipha_dst, lport))
20081676Sjpk 					continue;
20090Sstevel@tonic-gate 			} else {
20101676Sjpk 				if (!IPCL_BIND_MATCH_V6(connp, protocol,
20111676Sjpk 				    ((ip6_t *)hdr)->ip6_dst, lport))
20121676Sjpk 					continue;
20130Sstevel@tonic-gate 			}
20140Sstevel@tonic-gate 		}
20151676Sjpk 
20162263Ssommerfe 		if (IPCL_ZONE_MATCH(connp, zoneid) ||
20171676Sjpk 		    (unlabeled && connp->conn_mac_exempt))
20181676Sjpk 			break;
20191676Sjpk 	}
20201676Sjpk 	/*
20211676Sjpk 	 * If the connection is fully-bound and connection-oriented (TCP or
20221676Sjpk 	 * SCTP), then we've already validated the remote system's label.
20231676Sjpk 	 * There's no need to do it again for every packet.
20241676Sjpk 	 */
20251676Sjpk 	if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound ||
20261676Sjpk 	    !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) &&
20271676Sjpk 	    !tsol_receive_local(mp, dst, af, shared_addr, connp)) {
20281676Sjpk 		DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
20291676Sjpk 		    char *, "connp(1) could not receive mp(2)",
20301676Sjpk 		    conn_t *, connp, mblk_t *, mp);
20311676Sjpk 		connp = NULL;
20320Sstevel@tonic-gate 	}
2033409Skcpoon 
2034409Skcpoon 	if (connp != NULL)
2035409Skcpoon 		goto found;
2036409Skcpoon 	mutex_exit(&connfp->connf_lock);
2037409Skcpoon 
2038409Skcpoon 	/* Try to look for a wildcard match. */
20393448Sdh155122 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
2040409Skcpoon 	mutex_enter(&connfp->connf_lock);
2041409Skcpoon 	for (connp = connfp->connf_head; connp != NULL;
2042409Skcpoon 	    connp = connp->conn_next) {
2043409Skcpoon 		/* We don't allow v4 fallback for v6 raw socket. */
2044409Skcpoon 		if ((af == (connp->conn_af_isv6 ? IPV4_VERSION :
20452263Ssommerfe 		    IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) {
2046409Skcpoon 			continue;
2047409Skcpoon 		}
2048409Skcpoon 		if (af == IPV4_VERSION) {
2049409Skcpoon 			if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst))
2050409Skcpoon 				break;
2051409Skcpoon 		} else {
2052409Skcpoon 			if (IPCL_RAW_MATCH_V6(connp, protocol,
2053409Skcpoon 			    ((ip6_t *)hdr)->ip6_dst)) {
2054409Skcpoon 				break;
2055409Skcpoon 			}
2056409Skcpoon 		}
20570Sstevel@tonic-gate 	}
2058409Skcpoon 
2059409Skcpoon 	if (connp != NULL)
2060409Skcpoon 		goto found;
2061409Skcpoon 
20620Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
20630Sstevel@tonic-gate 	return (NULL);
2064409Skcpoon 
2065409Skcpoon found:
2066409Skcpoon 	ASSERT(connp != NULL);
2067409Skcpoon 	CONN_INC_REF(connp);
2068409Skcpoon 	mutex_exit(&connfp->connf_lock);
2069409Skcpoon 	return (connp);
20700Sstevel@tonic-gate }
20710Sstevel@tonic-gate 
20720Sstevel@tonic-gate /* ARGSUSED */
20730Sstevel@tonic-gate static int
20745240Snordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
20750Sstevel@tonic-gate {
20760Sstevel@tonic-gate 	itc_t	*itc = (itc_t *)buf;
20770Sstevel@tonic-gate 	conn_t 	*connp = &itc->itc_conn;
20785240Snordmark 	tcp_t	*tcp = (tcp_t *)&itc[1];
20795240Snordmark 
20805240Snordmark 	bzero(connp, sizeof (conn_t));
20815240Snordmark 	bzero(tcp, sizeof (tcp_t));
20825240Snordmark 
20835240Snordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
20845240Snordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
20858348SEric.Yu@Sun.COM 	cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
20860Sstevel@tonic-gate 	tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP);
20870Sstevel@tonic-gate 	connp->conn_tcp = tcp;
20880Sstevel@tonic-gate 	connp->conn_flags = IPCL_TCPCONN;
20890Sstevel@tonic-gate 	connp->conn_ulp = IPPROTO_TCP;
20900Sstevel@tonic-gate 	tcp->tcp_connp = connp;
20910Sstevel@tonic-gate 	return (0);
20920Sstevel@tonic-gate }
20930Sstevel@tonic-gate 
20940Sstevel@tonic-gate /* ARGSUSED */
20950Sstevel@tonic-gate static void
20965240Snordmark tcp_conn_destructor(void *buf, void *cdrarg)
20975240Snordmark {
20985240Snordmark 	itc_t	*itc = (itc_t *)buf;
20995240Snordmark 	conn_t 	*connp = &itc->itc_conn;
21005240Snordmark 	tcp_t	*tcp = (tcp_t *)&itc[1];
21015240Snordmark 
21025240Snordmark 	ASSERT(connp->conn_flags & IPCL_TCPCONN);
21035240Snordmark 	ASSERT(tcp->tcp_connp == connp);
21045240Snordmark 	ASSERT(connp->conn_tcp == tcp);
21055240Snordmark 	tcp_timermp_free(tcp);
21065240Snordmark 	mutex_destroy(&connp->conn_lock);
21075240Snordmark 	cv_destroy(&connp->conn_cv);
21088348SEric.Yu@Sun.COM 	cv_destroy(&connp->conn_sq_cv);
21095240Snordmark }
21105240Snordmark 
21115240Snordmark /* ARGSUSED */
21125240Snordmark static int
21135240Snordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
21145240Snordmark {
21155240Snordmark 	itc_t	*itc = (itc_t *)buf;
21165240Snordmark 	conn_t 	*connp = &itc->itc_conn;
21175240Snordmark 
21185240Snordmark 	bzero(connp, sizeof (conn_t));
21195240Snordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
21205240Snordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
21215240Snordmark 	connp->conn_flags = IPCL_IPCCONN;
21225240Snordmark 
21235240Snordmark 	return (0);
21245240Snordmark }
21255240Snordmark 
21265240Snordmark /* ARGSUSED */
21275240Snordmark static void
21285240Snordmark ip_conn_destructor(void *buf, void *cdrarg)
21295240Snordmark {
21305240Snordmark 	itc_t	*itc = (itc_t *)buf;
21315240Snordmark 	conn_t 	*connp = &itc->itc_conn;
21325240Snordmark 
21335240Snordmark 	ASSERT(connp->conn_flags & IPCL_IPCCONN);
21345240Snordmark 	ASSERT(connp->conn_priv == NULL);
21355240Snordmark 	mutex_destroy(&connp->conn_lock);
21365240Snordmark 	cv_destroy(&connp->conn_cv);
21375240Snordmark }
21385240Snordmark 
21395240Snordmark /* ARGSUSED */
21405240Snordmark static int
21415240Snordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
21425240Snordmark {
21435240Snordmark 	itc_t	*itc = (itc_t *)buf;
21445240Snordmark 	conn_t 	*connp = &itc->itc_conn;
21455240Snordmark 	udp_t	*udp = (udp_t *)&itc[1];
21465240Snordmark 
21475240Snordmark 	bzero(connp, sizeof (conn_t));
21485240Snordmark 	bzero(udp, sizeof (udp_t));
21495240Snordmark 
21505240Snordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
21515240Snordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
21525240Snordmark 	connp->conn_udp = udp;
21535240Snordmark 	connp->conn_flags = IPCL_UDPCONN;
21545240Snordmark 	connp->conn_ulp = IPPROTO_UDP;
21555240Snordmark 	udp->udp_connp = connp;
21565240Snordmark 	return (0);
21575240Snordmark }
21585240Snordmark 
21595240Snordmark /* ARGSUSED */
21605240Snordmark static void
21615240Snordmark udp_conn_destructor(void *buf, void *cdrarg)
21625240Snordmark {
21635240Snordmark 	itc_t	*itc = (itc_t *)buf;
21645240Snordmark 	conn_t 	*connp = &itc->itc_conn;
21655240Snordmark 	udp_t	*udp = (udp_t *)&itc[1];
21665240Snordmark 
21675240Snordmark 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
21685240Snordmark 	ASSERT(udp->udp_connp == connp);
21695240Snordmark 	ASSERT(connp->conn_udp == udp);
21705240Snordmark 	mutex_destroy(&connp->conn_lock);
21715240Snordmark 	cv_destroy(&connp->conn_cv);
21725240Snordmark }
21735240Snordmark 
21745240Snordmark /* ARGSUSED */
21755240Snordmark static int
21765240Snordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
21770Sstevel@tonic-gate {
21785240Snordmark 	itc_t	*itc = (itc_t *)buf;
21795240Snordmark 	conn_t 	*connp = &itc->itc_conn;
21805240Snordmark 	icmp_t	*icmp = (icmp_t *)&itc[1];
21815240Snordmark 
21825240Snordmark 	bzero(connp, sizeof (conn_t));
21835240Snordmark 	bzero(icmp, sizeof (icmp_t));
21845240Snordmark 
21855240Snordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
21865240Snordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
21875240Snordmark 	connp->conn_icmp = icmp;
21885240Snordmark 	connp->conn_flags = IPCL_RAWIPCONN;
21895240Snordmark 	connp->conn_ulp = IPPROTO_ICMP;
21905240Snordmark 	icmp->icmp_connp = connp;
21915240Snordmark 	return (0);
21925240Snordmark }
21935240Snordmark 
21945240Snordmark /* ARGSUSED */
21955240Snordmark static void
21965240Snordmark rawip_conn_destructor(void *buf, void *cdrarg)
21975240Snordmark {
21985240Snordmark 	itc_t	*itc = (itc_t *)buf;
21995240Snordmark 	conn_t 	*connp = &itc->itc_conn;
22005240Snordmark 	icmp_t	*icmp = (icmp_t *)&itc[1];
22015240Snordmark 
22025240Snordmark 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
22035240Snordmark 	ASSERT(icmp->icmp_connp == connp);
22045240Snordmark 	ASSERT(connp->conn_icmp == icmp);
22055240Snordmark 	mutex_destroy(&connp->conn_lock);
22065240Snordmark 	cv_destroy(&connp->conn_cv);
22075240Snordmark }
22085240Snordmark 
22095240Snordmark /* ARGSUSED */
22105240Snordmark static int
22115240Snordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
22125240Snordmark {
22135240Snordmark 	itc_t	*itc = (itc_t *)buf;
22145240Snordmark 	conn_t 	*connp = &itc->itc_conn;
22155240Snordmark 	rts_t	*rts = (rts_t *)&itc[1];
22165240Snordmark 
22175240Snordmark 	bzero(connp, sizeof (conn_t));
22185240Snordmark 	bzero(rts, sizeof (rts_t));
22195240Snordmark 
22205240Snordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
22215240Snordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
22225240Snordmark 	connp->conn_rts = rts;
22235240Snordmark 	connp->conn_flags = IPCL_RTSCONN;
22245240Snordmark 	rts->rts_connp = connp;
22255240Snordmark 	return (0);
22265240Snordmark }
22275240Snordmark 
22285240Snordmark /* ARGSUSED */
22295240Snordmark static void
22305240Snordmark rts_conn_destructor(void *buf, void *cdrarg)
22315240Snordmark {
22325240Snordmark 	itc_t	*itc = (itc_t *)buf;
22335240Snordmark 	conn_t 	*connp = &itc->itc_conn;
22345240Snordmark 	rts_t	*rts = (rts_t *)&itc[1];
22355240Snordmark 
22365240Snordmark 	ASSERT(connp->conn_flags & IPCL_RTSCONN);
22375240Snordmark 	ASSERT(rts->rts_connp == connp);
22385240Snordmark 	ASSERT(connp->conn_rts == rts);
22395240Snordmark 	mutex_destroy(&connp->conn_lock);
22405240Snordmark 	cv_destroy(&connp->conn_cv);
22415240Snordmark }
22425240Snordmark 
22438348SEric.Yu@Sun.COM /* ARGSUSED */
22448348SEric.Yu@Sun.COM int
22458348SEric.Yu@Sun.COM ip_helper_stream_constructor(void *buf, void *cdrarg, int kmflags)
22468348SEric.Yu@Sun.COM {
22478348SEric.Yu@Sun.COM 	int error;
22488348SEric.Yu@Sun.COM 	netstack_t	*ns;
22498348SEric.Yu@Sun.COM 	int		ret;
22508348SEric.Yu@Sun.COM 	tcp_stack_t	*tcps;
22518348SEric.Yu@Sun.COM 	ip_helper_stream_info_t	*ip_helper_str;
22528348SEric.Yu@Sun.COM 	ip_stack_t	*ipst;
22538348SEric.Yu@Sun.COM 
22548348SEric.Yu@Sun.COM 	ns = netstack_find_by_cred(kcred);
22558348SEric.Yu@Sun.COM 	ASSERT(ns != NULL);
22568348SEric.Yu@Sun.COM 	tcps = ns->netstack_tcp;
22578348SEric.Yu@Sun.COM 	ipst = ns->netstack_ip;
22588348SEric.Yu@Sun.COM 	ASSERT(tcps != NULL);
22598348SEric.Yu@Sun.COM 	ip_helper_str = (ip_helper_stream_info_t *)buf;
22608348SEric.Yu@Sun.COM 
22618444SRao.Shoaib@Sun.COM 	do {
22628444SRao.Shoaib@Sun.COM 		error = ldi_open_by_name(DEV_IP, IP_HELPER_STR, kcred,
22638444SRao.Shoaib@Sun.COM 		    &ip_helper_str->iphs_handle, ipst->ips_ldi_ident);
22648444SRao.Shoaib@Sun.COM 	} while (error == EINTR);
22658444SRao.Shoaib@Sun.COM 
22668444SRao.Shoaib@Sun.COM 	if (error == 0) {
22678444SRao.Shoaib@Sun.COM 		do {
22688444SRao.Shoaib@Sun.COM 			error = ldi_ioctl(
22698444SRao.Shoaib@Sun.COM 			    ip_helper_str->iphs_handle, SIOCSQPTR,
22708444SRao.Shoaib@Sun.COM 			    (intptr_t)buf, FKIOCTL, kcred, &ret);
22718444SRao.Shoaib@Sun.COM 		} while (error == EINTR);
22728444SRao.Shoaib@Sun.COM 
22738444SRao.Shoaib@Sun.COM 		if (error != 0) {
22748444SRao.Shoaib@Sun.COM 			(void) ldi_close(
22758444SRao.Shoaib@Sun.COM 			    ip_helper_str->iphs_handle, 0, kcred);
22768444SRao.Shoaib@Sun.COM 		}
22778348SEric.Yu@Sun.COM 	}
22788444SRao.Shoaib@Sun.COM 
22798348SEric.Yu@Sun.COM 	netstack_rele(ipst->ips_netstack);
22808444SRao.Shoaib@Sun.COM 
22818348SEric.Yu@Sun.COM 	return (error);
22828348SEric.Yu@Sun.COM }
22838348SEric.Yu@Sun.COM 
22848348SEric.Yu@Sun.COM /* ARGSUSED */
22858348SEric.Yu@Sun.COM static void
22868348SEric.Yu@Sun.COM ip_helper_stream_destructor(void *buf, void *cdrarg)
22878348SEric.Yu@Sun.COM {
22888348SEric.Yu@Sun.COM 	ip_helper_stream_info_t *ip_helper_str = (ip_helper_stream_info_t *)buf;
22898348SEric.Yu@Sun.COM 
22908444SRao.Shoaib@Sun.COM 	ip_helper_str->iphs_rq->q_ptr =
22918444SRao.Shoaib@Sun.COM 	    ip_helper_str->iphs_wq->q_ptr =
22928444SRao.Shoaib@Sun.COM 	    ip_helper_str->iphs_minfo;
22938444SRao.Shoaib@Sun.COM 	(void) ldi_close(ip_helper_str->iphs_handle, 0, kcred);
22948348SEric.Yu@Sun.COM }
22958348SEric.Yu@Sun.COM 
22968348SEric.Yu@Sun.COM 
22975240Snordmark /*
22985240Snordmark  * Called as part of ipcl_conn_destroy to assert and clear any pointers
22995240Snordmark  * in the conn_t.
23005240Snordmark  */
23015240Snordmark void
23025240Snordmark ipcl_conn_cleanup(conn_t *connp)
23035240Snordmark {
23045240Snordmark 	ASSERT(connp->conn_ire_cache == NULL);
23055240Snordmark 	ASSERT(connp->conn_latch == NULL);
23065240Snordmark #ifdef notdef
23075240Snordmark 	ASSERT(connp->conn_rq == NULL);
23085240Snordmark 	ASSERT(connp->conn_wq == NULL);
23095240Snordmark #endif
23105240Snordmark 	ASSERT(connp->conn_cred == NULL);
23115240Snordmark 	ASSERT(connp->conn_g_fanout == NULL);
23125240Snordmark 	ASSERT(connp->conn_g_next == NULL);
23135240Snordmark 	ASSERT(connp->conn_g_prev == NULL);
23145240Snordmark 	ASSERT(connp->conn_policy == NULL);
23155240Snordmark 	ASSERT(connp->conn_fanout == NULL);
23165240Snordmark 	ASSERT(connp->conn_next == NULL);
23175240Snordmark 	ASSERT(connp->conn_prev == NULL);
23185240Snordmark #ifdef notdef
23195240Snordmark 	/*
23205240Snordmark 	 * The ill and ipif pointers are not cleared before the conn_t
23215240Snordmark 	 * goes away since they do not hold a reference on the ill/ipif.
23225240Snordmark 	 * We should replace these pointers with ifindex/ipaddr_t to
23235240Snordmark 	 * make the code less complex.
23245240Snordmark 	 */
23255240Snordmark 	ASSERT(connp->conn_outgoing_ill == NULL);
23265240Snordmark 	ASSERT(connp->conn_incoming_ill == NULL);
23275240Snordmark 	ASSERT(connp->conn_multicast_ipif == NULL);
23285240Snordmark 	ASSERT(connp->conn_multicast_ill == NULL);
23295240Snordmark #endif
23305240Snordmark 	ASSERT(connp->conn_oper_pending_ill == NULL);
23315240Snordmark 	ASSERT(connp->conn_ilg == NULL);
23325240Snordmark 	ASSERT(connp->conn_drain_next == NULL);
23335240Snordmark 	ASSERT(connp->conn_drain_prev == NULL);
23345277Snordmark #ifdef notdef
23355277Snordmark 	/* conn_idl is not cleared when removed from idl list */
23365240Snordmark 	ASSERT(connp->conn_idl == NULL);
23375277Snordmark #endif
23385240Snordmark 	ASSERT(connp->conn_ipsec_opt_mp == NULL);
23395240Snordmark 	ASSERT(connp->conn_peercred == NULL);
23405240Snordmark 	ASSERT(connp->conn_netstack == NULL);
23415240Snordmark 
23428348SEric.Yu@Sun.COM 	ASSERT(connp->conn_helper_info == NULL);
23435240Snordmark 	/* Clear out the conn_t fields that are not preserved */
23445240Snordmark 	bzero(&connp->conn_start_clr,
23455240Snordmark 	    sizeof (conn_t) -
23465240Snordmark 	    ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
23470Sstevel@tonic-gate }
23480Sstevel@tonic-gate 
23490Sstevel@tonic-gate /*
23500Sstevel@tonic-gate  * All conns are inserted in a global multi-list for the benefit of
23510Sstevel@tonic-gate  * walkers. The walk is guaranteed to walk all open conns at the time
23520Sstevel@tonic-gate  * of the start of the walk exactly once. This property is needed to
23530Sstevel@tonic-gate  * achieve some cleanups during unplumb of interfaces. This is achieved
23540Sstevel@tonic-gate  * as follows.
23550Sstevel@tonic-gate  *
23560Sstevel@tonic-gate  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
23570Sstevel@tonic-gate  * call the insert and delete functions below at creation and deletion
23580Sstevel@tonic-gate  * time respectively. The conn never moves or changes its position in this
23590Sstevel@tonic-gate  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
23600Sstevel@tonic-gate  * won't increase due to walkers, once the conn deletion has started. Note
23610Sstevel@tonic-gate  * that we can't remove the conn from the global list and then wait for
23620Sstevel@tonic-gate  * the refcnt to drop to zero, since walkers would then see a truncated
23630Sstevel@tonic-gate  * list. CONN_INCIPIENT ensures that walkers don't start looking at
23640Sstevel@tonic-gate  * conns until ip_open is ready to make them globally visible.
23650Sstevel@tonic-gate  * The global round robin multi-list locks are held only to get the
23660Sstevel@tonic-gate  * next member/insertion/deletion and contention should be negligible
23670Sstevel@tonic-gate  * if the multi-list is much greater than the number of cpus.
23680Sstevel@tonic-gate  */
23690Sstevel@tonic-gate void
23700Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp)
23710Sstevel@tonic-gate {
23720Sstevel@tonic-gate 	int	index;
23733448Sdh155122 	struct connf_s	*connfp;
23743448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
23750Sstevel@tonic-gate 
23760Sstevel@tonic-gate 	/*
23770Sstevel@tonic-gate 	 * No need for atomic here. Approximate even distribution
23780Sstevel@tonic-gate 	 * in the global lists is sufficient.
23790Sstevel@tonic-gate 	 */
23803448Sdh155122 	ipst->ips_conn_g_index++;
23813448Sdh155122 	index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
23820Sstevel@tonic-gate 
23830Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
23840Sstevel@tonic-gate 	/*
23850Sstevel@tonic-gate 	 * Mark as INCIPIENT, so that walkers will ignore this
23860Sstevel@tonic-gate 	 * for now, till ip_open is ready to make it visible globally.
23870Sstevel@tonic-gate 	 */
23880Sstevel@tonic-gate 	connp->conn_state_flags |= CONN_INCIPIENT;
23890Sstevel@tonic-gate 
23903448Sdh155122 	connfp = &ipst->ips_ipcl_globalhash_fanout[index];
23910Sstevel@tonic-gate 	/* Insert at the head of the list */
23923448Sdh155122 	mutex_enter(&connfp->connf_lock);
23933448Sdh155122 	connp->conn_g_next = connfp->connf_head;
23940Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
23950Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp;
23963448Sdh155122 	connfp->connf_head = connp;
23970Sstevel@tonic-gate 
23980Sstevel@tonic-gate 	/* The fanout bucket this conn points to */
23993448Sdh155122 	connp->conn_g_fanout = connfp;
24000Sstevel@tonic-gate 
24013448Sdh155122 	mutex_exit(&connfp->connf_lock);
24020Sstevel@tonic-gate }
24030Sstevel@tonic-gate 
24040Sstevel@tonic-gate void
24050Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp)
24060Sstevel@tonic-gate {
24073448Sdh155122 	struct connf_s	*connfp;
24083448Sdh155122 
24090Sstevel@tonic-gate 	/*
24100Sstevel@tonic-gate 	 * We were never inserted in the global multi list.
24110Sstevel@tonic-gate 	 * IPCL_NONE variety is never inserted in the global multilist
24120Sstevel@tonic-gate 	 * since it is presumed to not need any cleanup and is transient.
24130Sstevel@tonic-gate 	 */
24140Sstevel@tonic-gate 	if (connp->conn_g_fanout == NULL)
24150Sstevel@tonic-gate 		return;
24160Sstevel@tonic-gate 
24173448Sdh155122 	connfp = connp->conn_g_fanout;
24183448Sdh155122 	mutex_enter(&connfp->connf_lock);
24190Sstevel@tonic-gate 	if (connp->conn_g_prev != NULL)
24200Sstevel@tonic-gate 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
24210Sstevel@tonic-gate 	else
24223448Sdh155122 		connfp->connf_head = connp->conn_g_next;
24230Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
24240Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
24253448Sdh155122 	mutex_exit(&connfp->connf_lock);
24260Sstevel@tonic-gate 
24270Sstevel@tonic-gate 	/* Better to stumble on a null pointer than to corrupt memory */
24280Sstevel@tonic-gate 	connp->conn_g_next = NULL;
24290Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
24305240Snordmark 	connp->conn_g_fanout = NULL;
24310Sstevel@tonic-gate }
24320Sstevel@tonic-gate 
24330Sstevel@tonic-gate /*
24340Sstevel@tonic-gate  * Walk the list of all conn_t's in the system, calling the function provided
24350Sstevel@tonic-gate  * with the specified argument for each.
24360Sstevel@tonic-gate  * Applies to both IPv4 and IPv6.
24370Sstevel@tonic-gate  *
24380Sstevel@tonic-gate  * IPCs may hold pointers to ipif/ill. To guard against stale pointers
24390Sstevel@tonic-gate  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
24400Sstevel@tonic-gate  * unplumbed or removed. New conn_t's that are created while we are walking
24410Sstevel@tonic-gate  * may be missed by this walk, because they are not necessarily inserted
24420Sstevel@tonic-gate  * at the tail of the list. They are new conn_t's and thus don't have any
24430Sstevel@tonic-gate  * stale pointers. The CONN_CLOSING flag ensures that no new reference
24440Sstevel@tonic-gate  * is created to the struct that is going away.
24450Sstevel@tonic-gate  */
24460Sstevel@tonic-gate void
24473448Sdh155122 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
24480Sstevel@tonic-gate {
24490Sstevel@tonic-gate 	int	i;
24500Sstevel@tonic-gate 	conn_t	*connp;
24510Sstevel@tonic-gate 	conn_t	*prev_connp;
24520Sstevel@tonic-gate 
24530Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
24543448Sdh155122 		mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
24550Sstevel@tonic-gate 		prev_connp = NULL;
24563448Sdh155122 		connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
24570Sstevel@tonic-gate 		while (connp != NULL) {
24580Sstevel@tonic-gate 			mutex_enter(&connp->conn_lock);
24590Sstevel@tonic-gate 			if (connp->conn_state_flags &
24600Sstevel@tonic-gate 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
24610Sstevel@tonic-gate 				mutex_exit(&connp->conn_lock);
24620Sstevel@tonic-gate 				connp = connp->conn_g_next;
24630Sstevel@tonic-gate 				continue;
24640Sstevel@tonic-gate 			}
24650Sstevel@tonic-gate 			CONN_INC_REF_LOCKED(connp);
24660Sstevel@tonic-gate 			mutex_exit(&connp->conn_lock);
24673448Sdh155122 			mutex_exit(
24683448Sdh155122 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
24690Sstevel@tonic-gate 			(*func)(connp, arg);
24700Sstevel@tonic-gate 			if (prev_connp != NULL)
24710Sstevel@tonic-gate 				CONN_DEC_REF(prev_connp);
24723448Sdh155122 			mutex_enter(
24733448Sdh155122 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
24740Sstevel@tonic-gate 			prev_connp = connp;
24750Sstevel@tonic-gate 			connp = connp->conn_g_next;
24760Sstevel@tonic-gate 		}
24773448Sdh155122 		mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
24780Sstevel@tonic-gate 		if (prev_connp != NULL)
24790Sstevel@tonic-gate 			CONN_DEC_REF(prev_connp);
24800Sstevel@tonic-gate 	}
24810Sstevel@tonic-gate }
24820Sstevel@tonic-gate 
24830Sstevel@tonic-gate /*
24840Sstevel@tonic-gate  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
24850Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
24860Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
24872323Sethindra  * (peer tcp in ESTABLISHED state).
24880Sstevel@tonic-gate  */
24890Sstevel@tonic-gate conn_t *
24903448Sdh155122 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph,
24913448Sdh155122     ip_stack_t *ipst)
24920Sstevel@tonic-gate {
24930Sstevel@tonic-gate 	uint32_t ports;
24940Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
24950Sstevel@tonic-gate 	connf_t	*connfp;
24960Sstevel@tonic-gate 	conn_t	*tconnp;
24970Sstevel@tonic-gate 	boolean_t zone_chk;
24980Sstevel@tonic-gate 
24990Sstevel@tonic-gate 	/*
25000Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
25010Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
25020Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
25030Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.
25040Sstevel@tonic-gate 	 */
25050Sstevel@tonic-gate 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
25060Sstevel@tonic-gate 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
25070Sstevel@tonic-gate 
25080Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
25090Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
25100Sstevel@tonic-gate 
25113448Sdh155122 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
25123448Sdh155122 	    ports, ipst)];
25130Sstevel@tonic-gate 
25140Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
25150Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
25160Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
25170Sstevel@tonic-gate 
25180Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
25190Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
25202323Sethindra 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
25210Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
25220Sstevel@tonic-gate 
25230Sstevel@tonic-gate 			ASSERT(tconnp != connp);
25240Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
25250Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
25260Sstevel@tonic-gate 			return (tconnp);
25270Sstevel@tonic-gate 		}
25280Sstevel@tonic-gate 	}
25290Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
25300Sstevel@tonic-gate 	return (NULL);
25310Sstevel@tonic-gate }
25320Sstevel@tonic-gate 
25330Sstevel@tonic-gate /*
25340Sstevel@tonic-gate  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
25350Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
25360Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
25372323Sethindra  * (peer tcp in ESTABLISHED state).
25380Sstevel@tonic-gate  */
25390Sstevel@tonic-gate conn_t *
25403448Sdh155122 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph,
25413448Sdh155122     ip_stack_t *ipst)
25420Sstevel@tonic-gate {
25430Sstevel@tonic-gate 	uint32_t ports;
25440Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
25450Sstevel@tonic-gate 	connf_t	*connfp;
25460Sstevel@tonic-gate 	conn_t	*tconnp;
25470Sstevel@tonic-gate 	boolean_t zone_chk;
25480Sstevel@tonic-gate 
25490Sstevel@tonic-gate 	/*
25500Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
25510Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
25520Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
25530Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.  We
25540Sstevel@tonic-gate 	 * don't do Zone check for link local address(es) because the
25550Sstevel@tonic-gate 	 * current Zone implementation treats each link local address as
25560Sstevel@tonic-gate 	 * being unique per system node, i.e. they belong to global Zone.
25570Sstevel@tonic-gate 	 */
25580Sstevel@tonic-gate 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
25590Sstevel@tonic-gate 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
25600Sstevel@tonic-gate 
25610Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
25620Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
25630Sstevel@tonic-gate 
25643448Sdh155122 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
25653448Sdh155122 	    ports, ipst)];
25660Sstevel@tonic-gate 
25670Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
25680Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
25690Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
25700Sstevel@tonic-gate 
25710Sstevel@tonic-gate 		/* We skip tcp_bound_if check here as this is loopback tcp */
25720Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
25730Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
25742323Sethindra 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
25750Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
25760Sstevel@tonic-gate 
25770Sstevel@tonic-gate 			ASSERT(tconnp != connp);
25780Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
25790Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
25800Sstevel@tonic-gate 			return (tconnp);
25810Sstevel@tonic-gate 		}
25820Sstevel@tonic-gate 	}
25830Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
25840Sstevel@tonic-gate 	return (NULL);
25850Sstevel@tonic-gate }
25860Sstevel@tonic-gate 
25870Sstevel@tonic-gate /*
25880Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
25890Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
25900Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
25910Sstevel@tonic-gate  */
25920Sstevel@tonic-gate conn_t *
25933448Sdh155122 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state,
25943448Sdh155122     ip_stack_t *ipst)
25950Sstevel@tonic-gate {
25960Sstevel@tonic-gate 	uint32_t ports;
25970Sstevel@tonic-gate 	uint16_t *pports;
25980Sstevel@tonic-gate 	connf_t	*connfp;
25990Sstevel@tonic-gate 	conn_t	*tconnp;
26000Sstevel@tonic-gate 
26010Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
26020Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
26030Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
26040Sstevel@tonic-gate 
26053448Sdh155122 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
26064691Skcpoon 	    ports, ipst)];
26070Sstevel@tonic-gate 
26080Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
26090Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
26100Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
26110Sstevel@tonic-gate 
26120Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
26130Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
26140Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= min_state) {
26150Sstevel@tonic-gate 
26160Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
26170Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
26180Sstevel@tonic-gate 			return (tconnp);
26190Sstevel@tonic-gate 		}
26200Sstevel@tonic-gate 	}
26210Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
26220Sstevel@tonic-gate 	return (NULL);
26230Sstevel@tonic-gate }
26240Sstevel@tonic-gate 
26250Sstevel@tonic-gate /*
26260Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
26270Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
26280Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
26290Sstevel@tonic-gate  * Match on ifindex in addition to addresses.
26300Sstevel@tonic-gate  */
26310Sstevel@tonic-gate conn_t *
26320Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
26333448Sdh155122     uint_t ifindex, ip_stack_t *ipst)
26340Sstevel@tonic-gate {
26350Sstevel@tonic-gate 	tcp_t	*tcp;
26360Sstevel@tonic-gate 	uint32_t ports;
26370Sstevel@tonic-gate 	uint16_t *pports;
26380Sstevel@tonic-gate 	connf_t	*connfp;
26390Sstevel@tonic-gate 	conn_t	*tconnp;
26400Sstevel@tonic-gate 
26410Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
26420Sstevel@tonic-gate 	pports[0] = tcpha->tha_fport;
26430Sstevel@tonic-gate 	pports[1] = tcpha->tha_lport;
26440Sstevel@tonic-gate 
26453448Sdh155122 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
26464691Skcpoon 	    ports, ipst)];
26470Sstevel@tonic-gate 
26480Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
26490Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
26500Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
26510Sstevel@tonic-gate 
26520Sstevel@tonic-gate 		tcp = tconnp->conn_tcp;
26530Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
26540Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
26550Sstevel@tonic-gate 		    tcp->tcp_state >= min_state &&
26560Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
26570Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex)) {
26580Sstevel@tonic-gate 
26590Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
26600Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
26610Sstevel@tonic-gate 			return (tconnp);
26620Sstevel@tonic-gate 		}
26630Sstevel@tonic-gate 	}
26640Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
26650Sstevel@tonic-gate 	return (NULL);
26660Sstevel@tonic-gate }
26670Sstevel@tonic-gate 
26680Sstevel@tonic-gate /*
26691676Sjpk  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
26701676Sjpk  * a listener when changing state.
26710Sstevel@tonic-gate  */
26720Sstevel@tonic-gate conn_t *
26733448Sdh155122 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
26743448Sdh155122     ip_stack_t *ipst)
26750Sstevel@tonic-gate {
26760Sstevel@tonic-gate 	connf_t		*bind_connfp;
26770Sstevel@tonic-gate 	conn_t		*connp;
26780Sstevel@tonic-gate 	tcp_t		*tcp;
26790Sstevel@tonic-gate 
26800Sstevel@tonic-gate 	/*
26810Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
26820Sstevel@tonic-gate 	 * all zeros.
26830Sstevel@tonic-gate 	 */
26840Sstevel@tonic-gate 	if (laddr == 0)
26850Sstevel@tonic-gate 		return (NULL);
26860Sstevel@tonic-gate 
26871676Sjpk 	ASSERT(zoneid != ALL_ZONES);
26881676Sjpk 
26893448Sdh155122 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
26900Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
26910Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
26920Sstevel@tonic-gate 	    connp = connp->conn_next) {
26930Sstevel@tonic-gate 		tcp = connp->conn_tcp;
26940Sstevel@tonic-gate 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
26952263Ssommerfe 		    IPCL_ZONE_MATCH(connp, zoneid) &&
26960Sstevel@tonic-gate 		    (tcp->tcp_listener == NULL)) {
26970Sstevel@tonic-gate 			CONN_INC_REF(connp);
26980Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
26990Sstevel@tonic-gate 			return (connp);
27000Sstevel@tonic-gate 		}
27010Sstevel@tonic-gate 	}
27020Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
27030Sstevel@tonic-gate 	return (NULL);
27040Sstevel@tonic-gate }
27050Sstevel@tonic-gate 
27061676Sjpk /*
27071676Sjpk  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
27081676Sjpk  * a listener when changing state.
27091676Sjpk  */
27100Sstevel@tonic-gate conn_t *
27110Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
27123448Sdh155122     zoneid_t zoneid, ip_stack_t *ipst)
27130Sstevel@tonic-gate {
27140Sstevel@tonic-gate 	connf_t		*bind_connfp;
27150Sstevel@tonic-gate 	conn_t		*connp = NULL;
27160Sstevel@tonic-gate 	tcp_t		*tcp;
27170Sstevel@tonic-gate 
27180Sstevel@tonic-gate 	/*
27190Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
27200Sstevel@tonic-gate 	 * all zeros.
27210Sstevel@tonic-gate 	 */
27220Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
27230Sstevel@tonic-gate 		return (NULL);
27240Sstevel@tonic-gate 
27251676Sjpk 	ASSERT(zoneid != ALL_ZONES);
27260Sstevel@tonic-gate 
27273448Sdh155122 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
27280Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
27290Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
27300Sstevel@tonic-gate 	    connp = connp->conn_next) {
27310Sstevel@tonic-gate 		tcp = connp->conn_tcp;
27320Sstevel@tonic-gate 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
27332263Ssommerfe 		    IPCL_ZONE_MATCH(connp, zoneid) &&
27340Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
27350Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex) &&
27360Sstevel@tonic-gate 		    tcp->tcp_listener == NULL) {
27370Sstevel@tonic-gate 			CONN_INC_REF(connp);
27380Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
27390Sstevel@tonic-gate 			return (connp);
27400Sstevel@tonic-gate 		}
27410Sstevel@tonic-gate 	}
27420Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
27430Sstevel@tonic-gate 	return (NULL);
27440Sstevel@tonic-gate }
27450Sstevel@tonic-gate 
2746741Smasputra /*
2747741Smasputra  * ipcl_get_next_conn
2748741Smasputra  *	get the next entry in the conn global list
2749741Smasputra  *	and put a reference on the next_conn.
2750741Smasputra  *	decrement the reference on the current conn.
2751741Smasputra  *
2752741Smasputra  * This is an iterator based walker function that also provides for
2753741Smasputra  * some selection by the caller. It walks through the conn_hash bucket
2754741Smasputra  * searching for the next valid connp in the list, and selects connections
2755741Smasputra  * that are neither closed nor condemned. It also REFHOLDS the conn
2756741Smasputra  * thus ensuring that the conn exists when the caller uses the conn.
2757741Smasputra  */
2758741Smasputra conn_t *
2759741Smasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2760741Smasputra {
2761741Smasputra 	conn_t	*next_connp;
2762741Smasputra 
2763741Smasputra 	if (connfp == NULL)
2764741Smasputra 		return (NULL);
2765741Smasputra 
2766741Smasputra 	mutex_enter(&connfp->connf_lock);
2767741Smasputra 
2768741Smasputra 	next_connp = (connp == NULL) ?
2769741Smasputra 	    connfp->connf_head : connp->conn_g_next;
2770741Smasputra 
2771741Smasputra 	while (next_connp != NULL) {
2772741Smasputra 		mutex_enter(&next_connp->conn_lock);
2773741Smasputra 		if (!(next_connp->conn_flags & conn_flags) ||
2774741Smasputra 		    (next_connp->conn_state_flags &
2775741Smasputra 		    (CONN_CONDEMNED | CONN_INCIPIENT))) {
2776741Smasputra 			/*
2777741Smasputra 			 * This conn has been condemned or
2778741Smasputra 			 * is closing, or the flags don't match
2779741Smasputra 			 */
2780741Smasputra 			mutex_exit(&next_connp->conn_lock);
2781741Smasputra 			next_connp = next_connp->conn_g_next;
2782741Smasputra 			continue;
2783741Smasputra 		}
2784741Smasputra 		CONN_INC_REF_LOCKED(next_connp);
2785741Smasputra 		mutex_exit(&next_connp->conn_lock);
2786741Smasputra 		break;
2787741Smasputra 	}
2788741Smasputra 
2789741Smasputra 	mutex_exit(&connfp->connf_lock);
2790741Smasputra 
2791741Smasputra 	if (connp != NULL)
2792741Smasputra 		CONN_DEC_REF(connp);
2793741Smasputra 
2794741Smasputra 	return (next_connp);
2795741Smasputra }
2796741Smasputra 
27970Sstevel@tonic-gate #ifdef CONN_DEBUG
27980Sstevel@tonic-gate /*
27990Sstevel@tonic-gate  * Trace of the last NBUF refhold/refrele
28000Sstevel@tonic-gate  */
28010Sstevel@tonic-gate int
28020Sstevel@tonic-gate conn_trace_ref(conn_t *connp)
28030Sstevel@tonic-gate {
28040Sstevel@tonic-gate 	int	last;
28050Sstevel@tonic-gate 	conn_trace_t	*ctb;
28060Sstevel@tonic-gate 
28070Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
28080Sstevel@tonic-gate 	last = connp->conn_trace_last;
28090Sstevel@tonic-gate 	last++;
28100Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
28110Sstevel@tonic-gate 		last = 0;
28120Sstevel@tonic-gate 
28130Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
28145023Scarlsonj 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
28150Sstevel@tonic-gate 	connp->conn_trace_last = last;
28160Sstevel@tonic-gate 	return (1);
28170Sstevel@tonic-gate }
28180Sstevel@tonic-gate 
28190Sstevel@tonic-gate int
28200Sstevel@tonic-gate conn_untrace_ref(conn_t *connp)
28210Sstevel@tonic-gate {
28220Sstevel@tonic-gate 	int	last;
28230Sstevel@tonic-gate 	conn_trace_t	*ctb;
28240Sstevel@tonic-gate 
28250Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
28260Sstevel@tonic-gate 	last = connp->conn_trace_last;
28270Sstevel@tonic-gate 	last++;
28280Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
28290Sstevel@tonic-gate 		last = 0;
28300Sstevel@tonic-gate 
28310Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
28325023Scarlsonj 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
28330Sstevel@tonic-gate 	connp->conn_trace_last = last;
28340Sstevel@tonic-gate 	return (1);
28350Sstevel@tonic-gate }
28360Sstevel@tonic-gate #endif
2837