xref: /onnv-gate/usr/src/uts/common/inet/ip/ipclassifier.c (revision 1676:37f4a3e2bd99)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51503Sericheng  * Common Development and Distribution License (the "License").
61503Sericheng  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
221503Sericheng  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
28*1676Sjpk const char ipclassifier_version[] = "@(#)ipclassifier.c	%I%	%E% SMI";
290Sstevel@tonic-gate 
300Sstevel@tonic-gate /*
310Sstevel@tonic-gate  * IP PACKET CLASSIFIER
320Sstevel@tonic-gate  *
330Sstevel@tonic-gate  * The IP packet classifier provides mapping between IP packets and persistent
340Sstevel@tonic-gate  * connection state for connection-oriented protocols. It also provides
350Sstevel@tonic-gate  * interface for managing connection states.
360Sstevel@tonic-gate  *
370Sstevel@tonic-gate  * The connection state is kept in conn_t data structure and contains, among
380Sstevel@tonic-gate  * other things:
390Sstevel@tonic-gate  *
400Sstevel@tonic-gate  *	o local/remote address and ports
410Sstevel@tonic-gate  *	o Transport protocol
420Sstevel@tonic-gate  *	o squeue for the connection (for TCP only)
430Sstevel@tonic-gate  *	o reference counter
440Sstevel@tonic-gate  *	o Connection state
450Sstevel@tonic-gate  *	o hash table linkage
460Sstevel@tonic-gate  *	o interface/ire information
470Sstevel@tonic-gate  *	o credentials
480Sstevel@tonic-gate  *	o ipsec policy
490Sstevel@tonic-gate  *	o send and receive functions.
500Sstevel@tonic-gate  *	o mutex lock.
510Sstevel@tonic-gate  *
520Sstevel@tonic-gate  * Connections use a reference counting scheme. They are freed when the
530Sstevel@tonic-gate  * reference counter drops to zero. A reference is incremented when connection
540Sstevel@tonic-gate  * is placed in a list or table, when incoming packet for the connection arrives
550Sstevel@tonic-gate  * and when connection is processed via squeue (squeue processing may be
560Sstevel@tonic-gate  * asynchronous and the reference protects the connection from being destroyed
570Sstevel@tonic-gate  * before its processing is finished).
580Sstevel@tonic-gate  *
590Sstevel@tonic-gate  * send and receive functions are currently used for TCP only. The send function
600Sstevel@tonic-gate  * determines the IP entry point for the packet once it leaves TCP to be sent to
610Sstevel@tonic-gate  * the destination address. The receive function is used by IP when the packet
620Sstevel@tonic-gate  * should be passed for TCP processing. When a new connection is created these
630Sstevel@tonic-gate  * are set to ip_output() and tcp_input() respectively. During the lifetime of
640Sstevel@tonic-gate  * the connection the send and receive functions may change depending on the
650Sstevel@tonic-gate  * changes in the connection state. For example, Once the connection is bound to
660Sstevel@tonic-gate  * an addresse, the receive function for this connection is set to
670Sstevel@tonic-gate  * tcp_conn_request().  This allows incoming SYNs to go directly into the
680Sstevel@tonic-gate  * listener SYN processing function without going to tcp_input() first.
690Sstevel@tonic-gate  *
700Sstevel@tonic-gate  * Classifier uses several hash tables:
710Sstevel@tonic-gate  *
720Sstevel@tonic-gate  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
730Sstevel@tonic-gate  *	ipcl_bind_fanout:	contains all connections in BOUND state
740Sstevel@tonic-gate  *	ipcl_proto_fanout:	IPv4 protocol fanout
750Sstevel@tonic-gate  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
760Sstevel@tonic-gate  *	ipcl_udp_fanout:	contains all UDP connections
770Sstevel@tonic-gate  *	ipcl_globalhash_fanout:	contains all connections
780Sstevel@tonic-gate  *
790Sstevel@tonic-gate  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
800Sstevel@tonic-gate  * which need to view all existing connections.
810Sstevel@tonic-gate  *
820Sstevel@tonic-gate  * All tables are protected by per-bucket locks. When both per-bucket lock and
830Sstevel@tonic-gate  * connection lock need to be held, the per-bucket lock should be acquired
840Sstevel@tonic-gate  * first, followed by the connection lock.
850Sstevel@tonic-gate  *
860Sstevel@tonic-gate  * All functions doing search in one of these tables increment a reference
870Sstevel@tonic-gate  * counter on the connection found (if any). This reference should be dropped
880Sstevel@tonic-gate  * when the caller has finished processing the connection.
890Sstevel@tonic-gate  *
900Sstevel@tonic-gate  *
910Sstevel@tonic-gate  * INTERFACES:
920Sstevel@tonic-gate  * ===========
930Sstevel@tonic-gate  *
940Sstevel@tonic-gate  * Connection Lookup:
950Sstevel@tonic-gate  * ------------------
960Sstevel@tonic-gate  *
970Sstevel@tonic-gate  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid)
980Sstevel@tonic-gate  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid)
990Sstevel@tonic-gate  *
1000Sstevel@tonic-gate  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
1010Sstevel@tonic-gate  * it can't find any associated connection. If the connection is found, its
1020Sstevel@tonic-gate  * reference counter is incremented.
1030Sstevel@tonic-gate  *
1040Sstevel@tonic-gate  *	mp:	mblock, containing packet header. The full header should fit
1050Sstevel@tonic-gate  *		into a single mblock. It should also contain at least full IP
1060Sstevel@tonic-gate  *		and TCP or UDP header.
1070Sstevel@tonic-gate  *
1080Sstevel@tonic-gate  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
1090Sstevel@tonic-gate  *
1100Sstevel@tonic-gate  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
1110Sstevel@tonic-gate  *		 the packet.
1120Sstevel@tonic-gate  *
113*1676Sjpk  * 	zoneid: The zone in which the returned connection must be; the zoneid
114*1676Sjpk  *		corresponding to the ire_zoneid on the IRE located for the
115*1676Sjpk  *		packet's destination address.
1160Sstevel@tonic-gate  *
1170Sstevel@tonic-gate  *	For TCP connections, the lookup order is as follows:
1180Sstevel@tonic-gate  *		5-tuple {src, dst, protocol, local port, remote port}
1190Sstevel@tonic-gate  *			lookup in ipcl_conn_fanout table.
1200Sstevel@tonic-gate  *		3-tuple {dst, remote port, protocol} lookup in
1210Sstevel@tonic-gate  *			ipcl_bind_fanout table.
1220Sstevel@tonic-gate  *
1230Sstevel@tonic-gate  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
1240Sstevel@tonic-gate  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
1250Sstevel@tonic-gate  *	these interfaces do not handle cases where a packets belongs
1260Sstevel@tonic-gate  *	to multiple UDP clients, which is handled in IP itself.
1270Sstevel@tonic-gate  *
128*1676Sjpk  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
129*1676Sjpk  * determine which actual zone gets the segment.  This is used only in a
130*1676Sjpk  * labeled environment.  The matching rules are:
131*1676Sjpk  *
132*1676Sjpk  *	- If it's not a multilevel port, then the label on the packet selects
133*1676Sjpk  *	  the zone.  Unlabeled packets are delivered to the global zone.
134*1676Sjpk  *
135*1676Sjpk  *	- If it's a multilevel port, then only the zone registered to receive
136*1676Sjpk  *	  packets on that port matches.
137*1676Sjpk  *
138*1676Sjpk  * Also, in a labeled environment, packet labels need to be checked.  For fully
139*1676Sjpk  * bound TCP connections, we can assume that the packet label was checked
140*1676Sjpk  * during connection establishment, and doesn't need to be checked on each
141*1676Sjpk  * packet.  For others, though, we need to check for strict equality or, for
142*1676Sjpk  * multilevel ports, membership in the range or set.  This part currently does
143*1676Sjpk  * a tnrh lookup on each packet, but could be optimized to use cached results
144*1676Sjpk  * if that were necessary.  (SCTP doesn't come through here, but if it did,
145*1676Sjpk  * we would apply the same rules as TCP.)
146*1676Sjpk  *
147*1676Sjpk  * An implication of the above is that fully-bound TCP sockets must always use
148*1676Sjpk  * distinct 4-tuples; they can't be discriminated by label alone.
149*1676Sjpk  *
150*1676Sjpk  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
151*1676Sjpk  * as there's no connection set-up handshake and no shared state.
152*1676Sjpk  *
153*1676Sjpk  * Labels on looped-back packets within a single zone do not need to be
154*1676Sjpk  * checked, as all processes in the same zone have the same label.
155*1676Sjpk  *
156*1676Sjpk  * Finally, for unlabeled packets received by a labeled system, special rules
157*1676Sjpk  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
158*1676Sjpk  * socket in the zone whose label matches the default label of the sender, if
159*1676Sjpk  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
160*1676Sjpk  * receiver's label must dominate the sender's default label.
161*1676Sjpk  *
1620Sstevel@tonic-gate  * conn_t	*ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int);
1630Sstevel@tonic-gate  * conn_t	*ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t);
1640Sstevel@tonic-gate  *
1650Sstevel@tonic-gate  *	Lookup routine to find a exact match for {src, dst, local port,
1660Sstevel@tonic-gate  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
1670Sstevel@tonic-gate  *	ports are read from the IP and TCP header respectively.
1680Sstevel@tonic-gate  *
1690Sstevel@tonic-gate  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol);
1700Sstevel@tonic-gate  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex);
1710Sstevel@tonic-gate  *
1720Sstevel@tonic-gate  * 	Lookup routine to find a listener with the tuple {lport, laddr,
1730Sstevel@tonic-gate  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
1740Sstevel@tonic-gate  * 	parameter interface index is also compared.
1750Sstevel@tonic-gate  *
1760Sstevel@tonic-gate  * void ipcl_walk(func, arg)
1770Sstevel@tonic-gate  *
1780Sstevel@tonic-gate  * 	Apply 'func' to every connection available. The 'func' is called as
1790Sstevel@tonic-gate  *	(*func)(connp, arg). The walk is non-atomic so connections may be
1800Sstevel@tonic-gate  *	created and destroyed during the walk. The CONN_CONDEMNED and
1810Sstevel@tonic-gate  *	CONN_INCIPIENT flags ensure that connections which are newly created
1820Sstevel@tonic-gate  *	or being destroyed are not selected by the walker.
1830Sstevel@tonic-gate  *
1840Sstevel@tonic-gate  * Table Updates
1850Sstevel@tonic-gate  * -------------
1860Sstevel@tonic-gate  *
1870Sstevel@tonic-gate  * int ipcl_conn_insert(connp, protocol, src, dst, ports)
1880Sstevel@tonic-gate  * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex)
1890Sstevel@tonic-gate  *
1900Sstevel@tonic-gate  *	Insert 'connp' in the ipcl_conn_fanout.
1910Sstevel@tonic-gate  *	Arguements :
1920Sstevel@tonic-gate  *		connp		conn_t to be inserted
1930Sstevel@tonic-gate  *		protocol	connection protocol
1940Sstevel@tonic-gate  *		src		source address
1950Sstevel@tonic-gate  *		dst		destination address
1960Sstevel@tonic-gate  *		ports		local and remote port
1970Sstevel@tonic-gate  *		ifindex		interface index for IPv6 connections
1980Sstevel@tonic-gate  *
1990Sstevel@tonic-gate  *	Return value :
2000Sstevel@tonic-gate  *		0		if connp was inserted
2010Sstevel@tonic-gate  *		EADDRINUSE	if the connection with the same tuple
2020Sstevel@tonic-gate  *				already exists.
2030Sstevel@tonic-gate  *
2040Sstevel@tonic-gate  * int ipcl_bind_insert(connp, protocol, src, lport);
2050Sstevel@tonic-gate  * int ipcl_bind_insert_v6(connp, protocol, src, lport);
2060Sstevel@tonic-gate  *
2070Sstevel@tonic-gate  * 	Insert 'connp' in ipcl_bind_fanout.
2080Sstevel@tonic-gate  * 	Arguements :
2090Sstevel@tonic-gate  * 		connp		conn_t to be inserted
2100Sstevel@tonic-gate  * 		protocol	connection protocol
2110Sstevel@tonic-gate  * 		src		source address connection wants
2120Sstevel@tonic-gate  * 				to bind to
2130Sstevel@tonic-gate  * 		lport		local port connection wants to
2140Sstevel@tonic-gate  * 				bind to
2150Sstevel@tonic-gate  *
2160Sstevel@tonic-gate  *
2170Sstevel@tonic-gate  * void ipcl_hash_remove(connp);
2180Sstevel@tonic-gate  *
2190Sstevel@tonic-gate  * 	Removes the 'connp' from the connection fanout table.
2200Sstevel@tonic-gate  *
2210Sstevel@tonic-gate  * Connection Creation/Destruction
2220Sstevel@tonic-gate  * -------------------------------
2230Sstevel@tonic-gate  *
2240Sstevel@tonic-gate  * conn_t *ipcl_conn_create(type, sleep)
2250Sstevel@tonic-gate  *
2260Sstevel@tonic-gate  * 	Creates a new conn based on the type flag, inserts it into
2270Sstevel@tonic-gate  * 	globalhash table.
2280Sstevel@tonic-gate  *
2290Sstevel@tonic-gate  *	type:	This flag determines the type of conn_t which needs to be
2300Sstevel@tonic-gate  *		created.
2310Sstevel@tonic-gate  *		IPCL_TCPCONN	indicates a TCP connection
2320Sstevel@tonic-gate  *		IPCL_IPCONN	indicates all non-TCP connections.
2330Sstevel@tonic-gate  *
2340Sstevel@tonic-gate  * void ipcl_conn_destroy(connp)
2350Sstevel@tonic-gate  *
2360Sstevel@tonic-gate  * 	Destroys the connection state, removes it from the global
2370Sstevel@tonic-gate  * 	connection hash table and frees its memory.
2380Sstevel@tonic-gate  */
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate #include <sys/types.h>
2410Sstevel@tonic-gate #include <sys/stream.h>
2420Sstevel@tonic-gate #include <sys/stropts.h>
2430Sstevel@tonic-gate #include <sys/sysmacros.h>
2440Sstevel@tonic-gate #include <sys/strsubr.h>
2450Sstevel@tonic-gate #include <sys/strsun.h>
2460Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
2470Sstevel@tonic-gate #include <sys/ddi.h>
2480Sstevel@tonic-gate #include <sys/cmn_err.h>
2490Sstevel@tonic-gate #include <sys/debug.h>
2500Sstevel@tonic-gate 
2510Sstevel@tonic-gate #include <sys/systm.h>
2520Sstevel@tonic-gate #include <sys/param.h>
2530Sstevel@tonic-gate #include <sys/kmem.h>
2540Sstevel@tonic-gate #include <sys/isa_defs.h>
2550Sstevel@tonic-gate #include <inet/common.h>
2560Sstevel@tonic-gate #include <netinet/ip6.h>
2570Sstevel@tonic-gate #include <netinet/icmp6.h>
2580Sstevel@tonic-gate 
2590Sstevel@tonic-gate #include <inet/ip.h>
2600Sstevel@tonic-gate #include <inet/ip6.h>
2610Sstevel@tonic-gate #include <inet/tcp.h>
2620Sstevel@tonic-gate #include <inet/ip_ndp.h>
263741Smasputra #include <inet/udp_impl.h>
2640Sstevel@tonic-gate #include <inet/sctp_ip.h>
2650Sstevel@tonic-gate 
2660Sstevel@tonic-gate #include <sys/cpuvar.h>
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate #include <inet/ipclassifier.h>
2690Sstevel@tonic-gate #include <inet/ipsec_impl.h>
2700Sstevel@tonic-gate 
271*1676Sjpk #include <sys/tsol/tnet.h>
272*1676Sjpk 
2730Sstevel@tonic-gate #ifdef DEBUG
2740Sstevel@tonic-gate #define	IPCL_DEBUG
2750Sstevel@tonic-gate #else
2760Sstevel@tonic-gate #undef	IPCL_DEBUG
2770Sstevel@tonic-gate #endif
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate #ifdef	IPCL_DEBUG
2800Sstevel@tonic-gate int	ipcl_debug_level = 0;
2810Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args)	\
2820Sstevel@tonic-gate 	if (ipcl_debug_level  & level) { printf args; }
2830Sstevel@tonic-gate #else
2840Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args) {; }
2850Sstevel@tonic-gate #endif
2860Sstevel@tonic-gate connf_t	*ipcl_conn_fanout;
2870Sstevel@tonic-gate connf_t	*ipcl_bind_fanout;
2880Sstevel@tonic-gate connf_t	ipcl_proto_fanout[IPPROTO_MAX + 1];
2890Sstevel@tonic-gate connf_t	ipcl_proto_fanout_v6[IPPROTO_MAX + 1];
2900Sstevel@tonic-gate connf_t	*ipcl_udp_fanout;
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate /* A separate hash list for raw socket. */
2930Sstevel@tonic-gate connf_t *ipcl_raw_fanout;
2940Sstevel@tonic-gate 
2950Sstevel@tonic-gate connf_t rts_clients;
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate /* Old value for compatibility */
2980Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0;
2990Sstevel@tonic-gate 
3000Sstevel@tonic-gate /* New value. Zero means choose automatically. */
3010Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0;
3020Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192;
3030Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500;
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate uint_t ipcl_conn_fanout_size = 0;
3060Sstevel@tonic-gate 
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate /* bind/udp fanout table size */
3090Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512;
3101503Sericheng uint_t ipcl_udp_fanout_size = 16384;
3110Sstevel@tonic-gate 
3120Sstevel@tonic-gate /* Raw socket fanout size.  Must be a power of 2. */
3130Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256;
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate /*
3160Sstevel@tonic-gate  * Power of 2^N Primes useful for hashing for N of 0-28,
3170Sstevel@tonic-gate  * these primes are the nearest prime <= 2^N - 2^(N-2).
3180Sstevel@tonic-gate  */
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
3210Sstevel@tonic-gate 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
3220Sstevel@tonic-gate 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
3230Sstevel@tonic-gate 		50331599, 100663291, 201326557, 0}
3240Sstevel@tonic-gate 
3250Sstevel@tonic-gate /*
3260Sstevel@tonic-gate  * wrapper structure to ensure that conn+tcpb are aligned
3270Sstevel@tonic-gate  * on cache lines.
3280Sstevel@tonic-gate  */
3290Sstevel@tonic-gate typedef struct itc_s {
3300Sstevel@tonic-gate 	union {
3310Sstevel@tonic-gate 		conn_t	itcu_conn;
3320Sstevel@tonic-gate 		char	itcu_filler[CACHE_ALIGN(conn_s)];
3330Sstevel@tonic-gate 	}	itc_u;
3340Sstevel@tonic-gate 	tcp_t	itc_tcp;
3350Sstevel@tonic-gate } itc_t;
3360Sstevel@tonic-gate 
3370Sstevel@tonic-gate #define	itc_conn	itc_u.itcu_conn
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate struct kmem_cache  *ipcl_tcpconn_cache;
3400Sstevel@tonic-gate struct kmem_cache  *ipcl_tcp_cache;
3410Sstevel@tonic-gate struct kmem_cache  *ipcl_conn_cache;
3420Sstevel@tonic-gate extern struct kmem_cache  *sctp_conn_cache;
3430Sstevel@tonic-gate extern struct kmem_cache  *tcp_sack_info_cache;
3440Sstevel@tonic-gate extern struct kmem_cache  *tcp_iphc_cache;
3450Sstevel@tonic-gate 
3460Sstevel@tonic-gate extern void	tcp_timermp_free(tcp_t *);
3470Sstevel@tonic-gate extern mblk_t	*tcp_timermp_alloc(int);
3480Sstevel@tonic-gate 
3490Sstevel@tonic-gate static int	ipcl_tcpconn_constructor(void *, void *, int);
3500Sstevel@tonic-gate static void	ipcl_tcpconn_destructor(void *, void *);
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate static int conn_g_index;
3530Sstevel@tonic-gate connf_t	*ipcl_globalhash_fanout;
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate #ifdef	IPCL_DEBUG
3560Sstevel@tonic-gate #define	INET_NTOA_BUFSIZE	18
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate static char *
3590Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b)
3600Sstevel@tonic-gate {
3610Sstevel@tonic-gate 	unsigned char	*p;
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate 	p = (unsigned char *)&in;
3640Sstevel@tonic-gate 	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
3650Sstevel@tonic-gate 	return (b);
3660Sstevel@tonic-gate }
3670Sstevel@tonic-gate #endif
3680Sstevel@tonic-gate 
3690Sstevel@tonic-gate /*
3700Sstevel@tonic-gate  * ipclassifier intialization routine, sets up hash tables and
3710Sstevel@tonic-gate  * conn caches.
3720Sstevel@tonic-gate  */
3730Sstevel@tonic-gate void
3740Sstevel@tonic-gate ipcl_init(void)
3750Sstevel@tonic-gate {
3760Sstevel@tonic-gate 	int i;
3770Sstevel@tonic-gate 	int sizes[] = P2Ps();
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate 	ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache",
3800Sstevel@tonic-gate 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
381741Smasputra 	    NULL, NULL, NULL, NULL, NULL, 0);
3820Sstevel@tonic-gate 
3830Sstevel@tonic-gate 	ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache",
3840Sstevel@tonic-gate 	    sizeof (itc_t), CACHE_ALIGN_SIZE,
3850Sstevel@tonic-gate 	    ipcl_tcpconn_constructor, ipcl_tcpconn_destructor,
3860Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 	/*
3890Sstevel@tonic-gate 	 * Calculate size of conn fanout table.
3900Sstevel@tonic-gate 	 */
3910Sstevel@tonic-gate 	if (ipcl_conn_hash_size != 0) {
3920Sstevel@tonic-gate 		ipcl_conn_fanout_size = ipcl_conn_hash_size;
3930Sstevel@tonic-gate 	} else if (tcp_conn_hash_size != 0) {
3940Sstevel@tonic-gate 		ipcl_conn_fanout_size = tcp_conn_hash_size;
3950Sstevel@tonic-gate 	} else {
3960Sstevel@tonic-gate 		extern pgcnt_t freemem;
3970Sstevel@tonic-gate 
3980Sstevel@tonic-gate 		ipcl_conn_fanout_size =
3990Sstevel@tonic-gate 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate 		if (ipcl_conn_fanout_size > ipcl_conn_hash_maxsize)
4020Sstevel@tonic-gate 			ipcl_conn_fanout_size = ipcl_conn_hash_maxsize;
4030Sstevel@tonic-gate 	}
4040Sstevel@tonic-gate 
4050Sstevel@tonic-gate 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
4060Sstevel@tonic-gate 		if (sizes[i] >= ipcl_conn_fanout_size) {
4070Sstevel@tonic-gate 			break;
4080Sstevel@tonic-gate 		}
4090Sstevel@tonic-gate 	}
4100Sstevel@tonic-gate 	if ((ipcl_conn_fanout_size = sizes[i]) == 0) {
4110Sstevel@tonic-gate 		/* Out of range, use the 2^16 value */
4120Sstevel@tonic-gate 		ipcl_conn_fanout_size = sizes[16];
4130Sstevel@tonic-gate 	}
4140Sstevel@tonic-gate 	ipcl_conn_fanout = (connf_t *)kmem_zalloc(ipcl_conn_fanout_size *
4150Sstevel@tonic-gate 	    sizeof (*ipcl_conn_fanout), KM_SLEEP);
4160Sstevel@tonic-gate 
4170Sstevel@tonic-gate 	for (i = 0; i < ipcl_conn_fanout_size; i++) {
4180Sstevel@tonic-gate 		mutex_init(&ipcl_conn_fanout[i].connf_lock, NULL,
4190Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4200Sstevel@tonic-gate 	}
4210Sstevel@tonic-gate 
4220Sstevel@tonic-gate 	ipcl_bind_fanout = (connf_t *)kmem_zalloc(ipcl_bind_fanout_size *
4230Sstevel@tonic-gate 	    sizeof (*ipcl_bind_fanout), KM_SLEEP);
4240Sstevel@tonic-gate 
4250Sstevel@tonic-gate 	for (i = 0; i < ipcl_bind_fanout_size; i++) {
4260Sstevel@tonic-gate 		mutex_init(&ipcl_bind_fanout[i].connf_lock, NULL,
4270Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4280Sstevel@tonic-gate 	}
4290Sstevel@tonic-gate 
4300Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) {
4310Sstevel@tonic-gate 		mutex_init(&ipcl_proto_fanout[i].connf_lock, NULL,
4320Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4330Sstevel@tonic-gate 	}
4340Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) {
4350Sstevel@tonic-gate 		mutex_init(&ipcl_proto_fanout_v6[i].connf_lock, NULL,
4360Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4370Sstevel@tonic-gate 	}
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate 	mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL);
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 	ipcl_udp_fanout = (connf_t *)kmem_zalloc(ipcl_udp_fanout_size *
4420Sstevel@tonic-gate 	    sizeof (*ipcl_udp_fanout), KM_SLEEP);
4430Sstevel@tonic-gate 
4440Sstevel@tonic-gate 	for (i = 0; i < ipcl_udp_fanout_size; i++) {
4450Sstevel@tonic-gate 		mutex_init(&ipcl_udp_fanout[i].connf_lock, NULL,
4460Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4470Sstevel@tonic-gate 	}
4480Sstevel@tonic-gate 
4490Sstevel@tonic-gate 	ipcl_raw_fanout = (connf_t *)kmem_zalloc(ipcl_raw_fanout_size *
4500Sstevel@tonic-gate 	    sizeof (*ipcl_raw_fanout), KM_SLEEP);
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 	for (i = 0; i < ipcl_raw_fanout_size; i++) {
4530Sstevel@tonic-gate 		mutex_init(&ipcl_raw_fanout[i].connf_lock, NULL,
4540Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4550Sstevel@tonic-gate 	}
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate 	ipcl_globalhash_fanout = (connf_t *)kmem_zalloc(sizeof (connf_t) *
4580Sstevel@tonic-gate 	    CONN_G_HASH_SIZE, KM_SLEEP);
4590Sstevel@tonic-gate 
4600Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4610Sstevel@tonic-gate 		mutex_init(&ipcl_globalhash_fanout[i].connf_lock, NULL,
4620Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4630Sstevel@tonic-gate 	}
4640Sstevel@tonic-gate }
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate void
4670Sstevel@tonic-gate ipcl_destroy(void)
4680Sstevel@tonic-gate {
4690Sstevel@tonic-gate 	int i;
4700Sstevel@tonic-gate 	kmem_cache_destroy(ipcl_conn_cache);
4710Sstevel@tonic-gate 	kmem_cache_destroy(ipcl_tcpconn_cache);
4720Sstevel@tonic-gate 	for (i = 0; i < ipcl_conn_fanout_size; i++)
4730Sstevel@tonic-gate 		mutex_destroy(&ipcl_conn_fanout[i].connf_lock);
4740Sstevel@tonic-gate 	kmem_free(ipcl_conn_fanout, ipcl_conn_fanout_size *
4750Sstevel@tonic-gate 	    sizeof (*ipcl_conn_fanout));
4760Sstevel@tonic-gate 	for (i = 0; i < ipcl_bind_fanout_size; i++)
4770Sstevel@tonic-gate 		mutex_destroy(&ipcl_bind_fanout[i].connf_lock);
4780Sstevel@tonic-gate 	kmem_free(ipcl_bind_fanout, ipcl_bind_fanout_size *
4790Sstevel@tonic-gate 	    sizeof (*ipcl_bind_fanout));
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout); i++)
4820Sstevel@tonic-gate 		mutex_destroy(&ipcl_proto_fanout[i].connf_lock);
4830Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++)
4840Sstevel@tonic-gate 		mutex_destroy(&ipcl_proto_fanout_v6[i].connf_lock);
4850Sstevel@tonic-gate 
4860Sstevel@tonic-gate 	for (i = 0; i < ipcl_udp_fanout_size; i++)
4870Sstevel@tonic-gate 		mutex_destroy(&ipcl_udp_fanout[i].connf_lock);
4880Sstevel@tonic-gate 	kmem_free(ipcl_udp_fanout, ipcl_udp_fanout_size *
4890Sstevel@tonic-gate 	    sizeof (*ipcl_udp_fanout));
4900Sstevel@tonic-gate 
4910Sstevel@tonic-gate 	for (i = 0; i < ipcl_raw_fanout_size; i++)
4920Sstevel@tonic-gate 		mutex_destroy(&ipcl_raw_fanout[i].connf_lock);
4930Sstevel@tonic-gate 	kmem_free(ipcl_raw_fanout, ipcl_raw_fanout_size *
4940Sstevel@tonic-gate 	    sizeof (*ipcl_raw_fanout));
4950Sstevel@tonic-gate 
4960Sstevel@tonic-gate 	kmem_free(ipcl_globalhash_fanout, sizeof (connf_t) * CONN_G_HASH_SIZE);
4970Sstevel@tonic-gate 	mutex_destroy(&rts_clients.connf_lock);
4980Sstevel@tonic-gate }
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate /*
5010Sstevel@tonic-gate  * conn creation routine. initialize the conn, sets the reference
5020Sstevel@tonic-gate  * and inserts it in the global hash table.
5030Sstevel@tonic-gate  */
5040Sstevel@tonic-gate conn_t *
5050Sstevel@tonic-gate ipcl_conn_create(uint32_t type, int sleep)
5060Sstevel@tonic-gate {
5070Sstevel@tonic-gate 	itc_t	*itc;
5080Sstevel@tonic-gate 	conn_t	*connp;
5090Sstevel@tonic-gate 
5100Sstevel@tonic-gate 	switch (type) {
5110Sstevel@tonic-gate 	case IPCL_TCPCONN:
5120Sstevel@tonic-gate 		if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache,
5130Sstevel@tonic-gate 		    sleep)) == NULL)
5140Sstevel@tonic-gate 			return (NULL);
5150Sstevel@tonic-gate 		connp = &itc->itc_conn;
5160Sstevel@tonic-gate 		connp->conn_ref = 1;
5170Sstevel@tonic-gate 		IPCL_DEBUG_LVL(1,
5180Sstevel@tonic-gate 		    ("ipcl_conn_create: connp = %p tcp (%p)",
5190Sstevel@tonic-gate 		    (void *)connp, (void *)connp->conn_tcp));
5200Sstevel@tonic-gate 		ipcl_globalhash_insert(connp);
5210Sstevel@tonic-gate 		break;
5220Sstevel@tonic-gate 	case IPCL_SCTPCONN:
5230Sstevel@tonic-gate 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
5240Sstevel@tonic-gate 			return (NULL);
5250Sstevel@tonic-gate 		connp->conn_flags = IPCL_SCTPCONN;
5260Sstevel@tonic-gate 		break;
5270Sstevel@tonic-gate 	case IPCL_IPCCONN:
5280Sstevel@tonic-gate 		connp = kmem_cache_alloc(ipcl_conn_cache, sleep);
5290Sstevel@tonic-gate 		if (connp == NULL)
530741Smasputra 			return (NULL);
5310Sstevel@tonic-gate 		bzero(connp, sizeof (conn_t));
532741Smasputra 		mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
5330Sstevel@tonic-gate 		cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
534741Smasputra 		connp->conn_flags = IPCL_IPCCONN;
5350Sstevel@tonic-gate 		connp->conn_ref = 1;
5360Sstevel@tonic-gate 		IPCL_DEBUG_LVL(1,
5370Sstevel@tonic-gate 		    ("ipcl_conn_create: connp = %p\n", (void *)connp));
5380Sstevel@tonic-gate 		ipcl_globalhash_insert(connp);
5390Sstevel@tonic-gate 		break;
540741Smasputra 	default:
541741Smasputra 		connp = NULL;
542741Smasputra 		ASSERT(0);
5430Sstevel@tonic-gate 	}
5440Sstevel@tonic-gate 
5450Sstevel@tonic-gate 	return (connp);
5460Sstevel@tonic-gate }
5470Sstevel@tonic-gate 
5480Sstevel@tonic-gate void
5490Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp)
5500Sstevel@tonic-gate {
5510Sstevel@tonic-gate 	mblk_t	*mp;
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
5540Sstevel@tonic-gate 	ASSERT(connp->conn_ref == 0);
5550Sstevel@tonic-gate 	ASSERT(connp->conn_ire_cache == NULL);
5560Sstevel@tonic-gate 
557*1676Sjpk 	if (connp->conn_peercred != NULL &&
558*1676Sjpk 	    connp->conn_peercred != connp->conn_cred)
559*1676Sjpk 		crfree(connp->conn_peercred);
560*1676Sjpk 	connp->conn_peercred = NULL;
561*1676Sjpk 
562*1676Sjpk 	if (connp->conn_cred != NULL) {
563*1676Sjpk 		crfree(connp->conn_cred);
564*1676Sjpk 		connp->conn_cred = NULL;
565*1676Sjpk 	}
566*1676Sjpk 
5670Sstevel@tonic-gate 	ipcl_globalhash_remove(connp);
5680Sstevel@tonic-gate 
5690Sstevel@tonic-gate 	cv_destroy(&connp->conn_cv);
5700Sstevel@tonic-gate 	if (connp->conn_flags & IPCL_TCPCONN) {
571741Smasputra 		tcp_t	*tcp = connp->conn_tcp;
572741Smasputra 
5730Sstevel@tonic-gate 		mutex_destroy(&connp->conn_lock);
5740Sstevel@tonic-gate 		ASSERT(connp->conn_tcp != NULL);
5750Sstevel@tonic-gate 		tcp_free(tcp);
5760Sstevel@tonic-gate 		mp = tcp->tcp_timercache;
577*1676Sjpk 		tcp->tcp_cred = NULL;
5780Sstevel@tonic-gate 
5790Sstevel@tonic-gate 		if (tcp->tcp_sack_info != NULL) {
5800Sstevel@tonic-gate 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
5810Sstevel@tonic-gate 			kmem_cache_free(tcp_sack_info_cache,
5820Sstevel@tonic-gate 			    tcp->tcp_sack_info);
5830Sstevel@tonic-gate 		}
5840Sstevel@tonic-gate 		if (tcp->tcp_iphc != NULL) {
5850Sstevel@tonic-gate 			if (tcp->tcp_hdr_grown) {
5860Sstevel@tonic-gate 				kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len);
5870Sstevel@tonic-gate 			} else {
5880Sstevel@tonic-gate 				bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
5890Sstevel@tonic-gate 				kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc);
5900Sstevel@tonic-gate 			}
5910Sstevel@tonic-gate 			tcp->tcp_iphc_len = 0;
5920Sstevel@tonic-gate 		}
5930Sstevel@tonic-gate 		ASSERT(tcp->tcp_iphc_len == 0);
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate 		if (connp->conn_latch != NULL)
5960Sstevel@tonic-gate 			IPLATCH_REFRELE(connp->conn_latch);
5970Sstevel@tonic-gate 		if (connp->conn_policy != NULL)
5980Sstevel@tonic-gate 			IPPH_REFRELE(connp->conn_policy);
5990Sstevel@tonic-gate 		bzero(connp, sizeof (itc_t));
6000Sstevel@tonic-gate 
6010Sstevel@tonic-gate 		tcp->tcp_timercache = mp;
6020Sstevel@tonic-gate 		connp->conn_tcp = tcp;
6030Sstevel@tonic-gate 		connp->conn_flags = IPCL_TCPCONN;
6040Sstevel@tonic-gate 		connp->conn_ulp = IPPROTO_TCP;
6050Sstevel@tonic-gate 		tcp->tcp_connp = connp;
6060Sstevel@tonic-gate 		kmem_cache_free(ipcl_tcpconn_cache, connp);
6070Sstevel@tonic-gate 	} else if (connp->conn_flags & IPCL_SCTPCONN) {
6080Sstevel@tonic-gate 		sctp_free(connp);
6090Sstevel@tonic-gate 	} else {
610741Smasputra 		ASSERT(connp->conn_udp == NULL);
6110Sstevel@tonic-gate 		mutex_destroy(&connp->conn_lock);
6120Sstevel@tonic-gate 		kmem_cache_free(ipcl_conn_cache, connp);
6130Sstevel@tonic-gate 	}
6140Sstevel@tonic-gate }
6150Sstevel@tonic-gate 
6160Sstevel@tonic-gate /*
6170Sstevel@tonic-gate  * Running in cluster mode - deregister listener information
6180Sstevel@tonic-gate  */
6190Sstevel@tonic-gate 
6200Sstevel@tonic-gate static void
6210Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp)
6220Sstevel@tonic-gate {
6230Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
6240Sstevel@tonic-gate 	ASSERT(connp->conn_lport != 0);
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 	if (cl_inet_unlisten != NULL) {
6270Sstevel@tonic-gate 		sa_family_t	addr_family;
6280Sstevel@tonic-gate 		uint8_t		*laddrp;
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate 		if (connp->conn_pkt_isv6) {
6310Sstevel@tonic-gate 			addr_family = AF_INET6;
6320Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source_v6;
6330Sstevel@tonic-gate 		} else {
6340Sstevel@tonic-gate 			addr_family = AF_INET;
6350Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source;
6360Sstevel@tonic-gate 		}
6370Sstevel@tonic-gate 		(*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp,
6380Sstevel@tonic-gate 		    connp->conn_lport);
6390Sstevel@tonic-gate 	}
6400Sstevel@tonic-gate 	connp->conn_flags &= ~IPCL_CL_LISTENER;
6410Sstevel@tonic-gate }
6420Sstevel@tonic-gate 
6430Sstevel@tonic-gate /*
6440Sstevel@tonic-gate  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
6450Sstevel@tonic-gate  * which table the conn belonged to). So for debugging we can see which hash
6460Sstevel@tonic-gate  * table this connection was in.
6470Sstevel@tonic-gate  */
6480Sstevel@tonic-gate #define	IPCL_HASH_REMOVE(connp)	{					\
6490Sstevel@tonic-gate 	connf_t	*connfp = (connp)->conn_fanout;				\
6500Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
6510Sstevel@tonic-gate 	if (connfp != NULL) {						\
6520Sstevel@tonic-gate 		IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p",	\
6530Sstevel@tonic-gate 		    (void *)(connp)));					\
6540Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);			\
6550Sstevel@tonic-gate 		if ((connp)->conn_next != NULL)				\
6560Sstevel@tonic-gate 			(connp)->conn_next->conn_prev =			\
6570Sstevel@tonic-gate 			    (connp)->conn_prev;				\
6580Sstevel@tonic-gate 		if ((connp)->conn_prev != NULL)				\
6590Sstevel@tonic-gate 			(connp)->conn_prev->conn_next =			\
6600Sstevel@tonic-gate 			    (connp)->conn_next;				\
6610Sstevel@tonic-gate 		else							\
6620Sstevel@tonic-gate 			connfp->connf_head = (connp)->conn_next;	\
6630Sstevel@tonic-gate 		(connp)->conn_fanout = NULL;				\
6640Sstevel@tonic-gate 		(connp)->conn_next = NULL;				\
6650Sstevel@tonic-gate 		(connp)->conn_prev = NULL;				\
6660Sstevel@tonic-gate 		(connp)->conn_flags |= IPCL_REMOVED;			\
6670Sstevel@tonic-gate 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
6680Sstevel@tonic-gate 			ipcl_conn_unlisten((connp));			\
6690Sstevel@tonic-gate 		CONN_DEC_REF((connp));					\
6700Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);			\
6710Sstevel@tonic-gate 	}								\
6720Sstevel@tonic-gate }
6730Sstevel@tonic-gate 
6740Sstevel@tonic-gate void
6750Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp)
6760Sstevel@tonic-gate {
6770Sstevel@tonic-gate 	IPCL_HASH_REMOVE(connp);
6780Sstevel@tonic-gate }
6790Sstevel@tonic-gate 
6800Sstevel@tonic-gate /*
6810Sstevel@tonic-gate  * The whole purpose of this function is allow removal of
6820Sstevel@tonic-gate  * a conn_t from the connected hash for timewait reclaim.
6830Sstevel@tonic-gate  * This is essentially a TW reclaim fastpath where timewait
6840Sstevel@tonic-gate  * collector checks under fanout lock (so no one else can
6850Sstevel@tonic-gate  * get access to the conn_t) that refcnt is 2 i.e. one for
6860Sstevel@tonic-gate  * TCP and one for the classifier hash list. If ref count
6870Sstevel@tonic-gate  * is indeed 2, we can just remove the conn under lock and
6880Sstevel@tonic-gate  * avoid cleaning up the conn under squeue. This gives us
6890Sstevel@tonic-gate  * improved performance.
6900Sstevel@tonic-gate  */
6910Sstevel@tonic-gate void
6920Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
6930Sstevel@tonic-gate {
6940Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
6950Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
6960Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 	if ((connp)->conn_next != NULL) {
6990Sstevel@tonic-gate 		(connp)->conn_next->conn_prev =
7000Sstevel@tonic-gate 			(connp)->conn_prev;
7010Sstevel@tonic-gate 	}
7020Sstevel@tonic-gate 	if ((connp)->conn_prev != NULL) {
7030Sstevel@tonic-gate 		(connp)->conn_prev->conn_next =
7040Sstevel@tonic-gate 			(connp)->conn_next;
7050Sstevel@tonic-gate 	} else {
7060Sstevel@tonic-gate 		connfp->connf_head = (connp)->conn_next;
7070Sstevel@tonic-gate 	}
7080Sstevel@tonic-gate 	(connp)->conn_fanout = NULL;
7090Sstevel@tonic-gate 	(connp)->conn_next = NULL;
7100Sstevel@tonic-gate 	(connp)->conn_prev = NULL;
7110Sstevel@tonic-gate 	(connp)->conn_flags |= IPCL_REMOVED;
7120Sstevel@tonic-gate 	ASSERT((connp)->conn_ref == 2);
7130Sstevel@tonic-gate 	(connp)->conn_ref--;
7140Sstevel@tonic-gate }
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
7170Sstevel@tonic-gate 	ASSERT((connp)->conn_fanout == NULL);				\
7180Sstevel@tonic-gate 	ASSERT((connp)->conn_next == NULL);				\
7190Sstevel@tonic-gate 	ASSERT((connp)->conn_prev == NULL);				\
7200Sstevel@tonic-gate 	if ((connfp)->connf_head != NULL) {				\
7210Sstevel@tonic-gate 		(connfp)->connf_head->conn_prev = (connp);		\
7220Sstevel@tonic-gate 		(connp)->conn_next = (connfp)->connf_head;		\
7230Sstevel@tonic-gate 	}								\
7240Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
7250Sstevel@tonic-gate 	(connfp)->connf_head = (connp);					\
7260Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
7270Sstevel@tonic-gate 	    IPCL_CONNECTED;						\
7280Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
7290Sstevel@tonic-gate }
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
7320Sstevel@tonic-gate 	IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p "	\
7330Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
7340Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
7350Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
7360Sstevel@tonic-gate 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
7370Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
7380Sstevel@tonic-gate }
7390Sstevel@tonic-gate 
7400Sstevel@tonic-gate #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
7410Sstevel@tonic-gate 	conn_t *pconnp = NULL, *nconnp;					\
7420Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p "	\
7430Sstevel@tonic-gate 	    "connp %p", (void *)connfp, (void *)(connp)));		\
7440Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
7450Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
7460Sstevel@tonic-gate 	nconnp = (connfp)->connf_head;					\
747153Sethindra 	while (nconnp != NULL &&					\
748153Sethindra 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) {			\
749153Sethindra 		pconnp = nconnp;					\
750153Sethindra 		nconnp = nconnp->conn_next;				\
7510Sstevel@tonic-gate 	}								\
7520Sstevel@tonic-gate 	if (pconnp != NULL) {						\
7530Sstevel@tonic-gate 		pconnp->conn_next = (connp);				\
7540Sstevel@tonic-gate 		(connp)->conn_prev = pconnp;				\
7550Sstevel@tonic-gate 	} else {							\
7560Sstevel@tonic-gate 		(connfp)->connf_head = (connp);				\
7570Sstevel@tonic-gate 	}								\
7580Sstevel@tonic-gate 	if (nconnp != NULL) {						\
7590Sstevel@tonic-gate 		(connp)->conn_next = nconnp;				\
7600Sstevel@tonic-gate 		nconnp->conn_prev = (connp);				\
7610Sstevel@tonic-gate 	}								\
7620Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
7630Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
7640Sstevel@tonic-gate 	    IPCL_BOUND;							\
7650Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
7660Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
7670Sstevel@tonic-gate }
7680Sstevel@tonic-gate 
7690Sstevel@tonic-gate #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
7700Sstevel@tonic-gate 	conn_t **list, *prev, *next;					\
7710Sstevel@tonic-gate 	boolean_t isv4mapped =						\
7720Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6);			\
7730Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p "	\
7740Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
7750Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
7760Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
7770Sstevel@tonic-gate 	list = &(connfp)->connf_head;					\
7780Sstevel@tonic-gate 	prev = NULL;							\
7790Sstevel@tonic-gate 	while ((next = *list) != NULL) {				\
7800Sstevel@tonic-gate 		if (isv4mapped &&					\
7810Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) &&	\
7820Sstevel@tonic-gate 		    connp->conn_zoneid == next->conn_zoneid) {		\
7830Sstevel@tonic-gate 			(connp)->conn_next = next;			\
7840Sstevel@tonic-gate 			if (prev != NULL)				\
7850Sstevel@tonic-gate 				prev = next->conn_prev;			\
7860Sstevel@tonic-gate 			next->conn_prev = (connp);			\
7870Sstevel@tonic-gate 			break;						\
7880Sstevel@tonic-gate 		}							\
7890Sstevel@tonic-gate 		list = &next->conn_next;				\
7900Sstevel@tonic-gate 		prev = next;						\
7910Sstevel@tonic-gate 	}								\
7920Sstevel@tonic-gate 	(connp)->conn_prev = prev;					\
7930Sstevel@tonic-gate 	*list = (connp);						\
7940Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
7950Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
7960Sstevel@tonic-gate 	    IPCL_BOUND;							\
7970Sstevel@tonic-gate 	CONN_INC_REF((connp));						\
7980Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
7990Sstevel@tonic-gate }
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate void
8020Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
8030Sstevel@tonic-gate {
804*1676Sjpk 	ASSERT(!connp->conn_mac_exempt);
8050Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8060Sstevel@tonic-gate }
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate void
8090Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol)
8100Sstevel@tonic-gate {
8110Sstevel@tonic-gate 	connf_t	*connfp;
8120Sstevel@tonic-gate 
8130Sstevel@tonic-gate 	ASSERT(connp != NULL);
814*1676Sjpk 	ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH ||
815*1676Sjpk 	    protocol == IPPROTO_ESP);
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate 	connp->conn_ulp = protocol;
8180Sstevel@tonic-gate 
8190Sstevel@tonic-gate 	/* Insert it in the protocol hash */
8200Sstevel@tonic-gate 	connfp = &ipcl_proto_fanout[protocol];
8210Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8220Sstevel@tonic-gate }
8230Sstevel@tonic-gate 
8240Sstevel@tonic-gate void
8250Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol)
8260Sstevel@tonic-gate {
8270Sstevel@tonic-gate 	connf_t	*connfp;
8280Sstevel@tonic-gate 
8290Sstevel@tonic-gate 	ASSERT(connp != NULL);
830*1676Sjpk 	ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH ||
831*1676Sjpk 	    protocol == IPPROTO_ESP);
8320Sstevel@tonic-gate 
8330Sstevel@tonic-gate 	connp->conn_ulp = protocol;
8340Sstevel@tonic-gate 
8350Sstevel@tonic-gate 	/* Insert it in the Bind Hash */
8360Sstevel@tonic-gate 	connfp = &ipcl_proto_fanout_v6[protocol];
8370Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8380Sstevel@tonic-gate }
8390Sstevel@tonic-gate 
8400Sstevel@tonic-gate /*
8410Sstevel@tonic-gate  * This function is used only for inserting SCTP raw socket now.
8420Sstevel@tonic-gate  * This may change later.
8430Sstevel@tonic-gate  *
8440Sstevel@tonic-gate  * Note that only one raw socket can be bound to a port.  The param
8450Sstevel@tonic-gate  * lport is in network byte order.
8460Sstevel@tonic-gate  */
8470Sstevel@tonic-gate static int
8480Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
8490Sstevel@tonic-gate {
8500Sstevel@tonic-gate 	connf_t	*connfp;
8510Sstevel@tonic-gate 	conn_t	*oconnp;
8520Sstevel@tonic-gate 
8530Sstevel@tonic-gate 	connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))];
8540Sstevel@tonic-gate 
8550Sstevel@tonic-gate 	/* Check for existing raw socket already bound to the port. */
8560Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
8570Sstevel@tonic-gate 	for (oconnp = connfp->connf_head; oconnp != NULL;
858409Skcpoon 	    oconnp = oconnp->conn_next) {
8590Sstevel@tonic-gate 		if (oconnp->conn_lport == lport &&
8600Sstevel@tonic-gate 		    oconnp->conn_zoneid == connp->conn_zoneid &&
8610Sstevel@tonic-gate 		    oconnp->conn_af_isv6 == connp->conn_af_isv6 &&
8620Sstevel@tonic-gate 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
8630Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) ||
8640Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) ||
8650Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) ||
8660Sstevel@tonic-gate 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6,
8670Sstevel@tonic-gate 		    &connp->conn_srcv6))) {
8680Sstevel@tonic-gate 			break;
8690Sstevel@tonic-gate 		}
8700Sstevel@tonic-gate 	}
8710Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
8720Sstevel@tonic-gate 	if (oconnp != NULL)
8730Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
8740Sstevel@tonic-gate 
8750Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
8760Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) {
8770Sstevel@tonic-gate 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
8780Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) {
8790Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8800Sstevel@tonic-gate 		} else {
8810Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
8820Sstevel@tonic-gate 		}
8830Sstevel@tonic-gate 	} else {
8840Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
8850Sstevel@tonic-gate 	}
8860Sstevel@tonic-gate 	return (0);
8870Sstevel@tonic-gate }
8880Sstevel@tonic-gate 
8890Sstevel@tonic-gate /*
890*1676Sjpk  * Check for a MAC exemption conflict on a labeled system.  Note that for
891*1676Sjpk  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
892*1676Sjpk  * transport layer.  This check is for binding all other protocols.
893*1676Sjpk  *
894*1676Sjpk  * Returns true if there's a conflict.
895*1676Sjpk  */
896*1676Sjpk static boolean_t
897*1676Sjpk check_exempt_conflict_v4(conn_t *connp)
898*1676Sjpk {
899*1676Sjpk 	connf_t	*connfp;
900*1676Sjpk 	conn_t *tconn;
901*1676Sjpk 
902*1676Sjpk 	connfp = &ipcl_proto_fanout[connp->conn_ulp];
903*1676Sjpk 	mutex_enter(&connfp->connf_lock);
904*1676Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
905*1676Sjpk 	    tconn = tconn->conn_next) {
906*1676Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
907*1676Sjpk 		if (connp->conn_af_isv6 != tconn->conn_af_isv6)
908*1676Sjpk 			continue;
909*1676Sjpk 		/* If neither is exempt, then there's no conflict */
910*1676Sjpk 		if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt)
911*1676Sjpk 			continue;
912*1676Sjpk 		/* If both are bound to different specific addrs, ok */
913*1676Sjpk 		if (connp->conn_src != INADDR_ANY &&
914*1676Sjpk 		    tconn->conn_src != INADDR_ANY &&
915*1676Sjpk 		    connp->conn_src != tconn->conn_src)
916*1676Sjpk 			continue;
917*1676Sjpk 		/* These two conflict; fail */
918*1676Sjpk 		break;
919*1676Sjpk 	}
920*1676Sjpk 	mutex_exit(&connfp->connf_lock);
921*1676Sjpk 	return (tconn != NULL);
922*1676Sjpk }
923*1676Sjpk 
924*1676Sjpk static boolean_t
925*1676Sjpk check_exempt_conflict_v6(conn_t *connp)
926*1676Sjpk {
927*1676Sjpk 	connf_t	*connfp;
928*1676Sjpk 	conn_t *tconn;
929*1676Sjpk 
930*1676Sjpk 	connfp = &ipcl_proto_fanout[connp->conn_ulp];
931*1676Sjpk 	mutex_enter(&connfp->connf_lock);
932*1676Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
933*1676Sjpk 	    tconn = tconn->conn_next) {
934*1676Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
935*1676Sjpk 		if (connp->conn_af_isv6 != tconn->conn_af_isv6)
936*1676Sjpk 			continue;
937*1676Sjpk 		/* If neither is exempt, then there's no conflict */
938*1676Sjpk 		if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt)
939*1676Sjpk 			continue;
940*1676Sjpk 		/* If both are bound to different addrs, ok */
941*1676Sjpk 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) &&
942*1676Sjpk 		    !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) &&
943*1676Sjpk 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6))
944*1676Sjpk 			continue;
945*1676Sjpk 		/* These two conflict; fail */
946*1676Sjpk 		break;
947*1676Sjpk 	}
948*1676Sjpk 	mutex_exit(&connfp->connf_lock);
949*1676Sjpk 	return (tconn != NULL);
950*1676Sjpk }
951*1676Sjpk 
952*1676Sjpk /*
9530Sstevel@tonic-gate  * (v4, v6) bind hash insertion routines
9540Sstevel@tonic-gate  */
9550Sstevel@tonic-gate int
9560Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport)
9570Sstevel@tonic-gate {
9580Sstevel@tonic-gate 	connf_t	*connfp;
9590Sstevel@tonic-gate #ifdef	IPCL_DEBUG
9600Sstevel@tonic-gate 	char	buf[INET_NTOA_BUFSIZE];
9610Sstevel@tonic-gate #endif
9620Sstevel@tonic-gate 	int	ret = 0;
9630Sstevel@tonic-gate 
9640Sstevel@tonic-gate 	ASSERT(connp);
9650Sstevel@tonic-gate 
9660Sstevel@tonic-gate 	IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, "
9670Sstevel@tonic-gate 	    "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport));
9680Sstevel@tonic-gate 
9690Sstevel@tonic-gate 	connp->conn_ulp = protocol;
9700Sstevel@tonic-gate 	IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6);
9710Sstevel@tonic-gate 	connp->conn_lport = lport;
9720Sstevel@tonic-gate 
9730Sstevel@tonic-gate 	switch (protocol) {
974*1676Sjpk 	default:
975*1676Sjpk 		if (is_system_labeled() && check_exempt_conflict_v4(connp))
976*1676Sjpk 			return (EADDRINUSE);
977*1676Sjpk 		/* FALLTHROUGH */
9780Sstevel@tonic-gate 	case IPPROTO_UDP:
9790Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
9800Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
9810Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - udp\n",
9820Sstevel@tonic-gate 			    (void *)connp));
9830Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
9840Sstevel@tonic-gate 		} else {
9850Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
9860Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - protocol\n",
9870Sstevel@tonic-gate 			    (void *)connp));
9880Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout[protocol];
9890Sstevel@tonic-gate 		}
9900Sstevel@tonic-gate 
9910Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
9920Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
9930Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
9940Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
9950Sstevel@tonic-gate 		} else {
9960Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9970Sstevel@tonic-gate 		}
9980Sstevel@tonic-gate 		break;
9990Sstevel@tonic-gate 
10000Sstevel@tonic-gate 	case IPPROTO_TCP:
10010Sstevel@tonic-gate 
10020Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
1003*1676Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
10040Sstevel@tonic-gate 		connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
10050Sstevel@tonic-gate 		if (connp->conn_src != INADDR_ANY) {
10060Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10070Sstevel@tonic-gate 		} else {
10080Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10090Sstevel@tonic-gate 		}
10100Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
10110Sstevel@tonic-gate 			ASSERT(!connp->conn_pkt_isv6);
10120Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
10130Sstevel@tonic-gate 			(*cl_inet_listen)(IPPROTO_TCP, AF_INET,
10140Sstevel@tonic-gate 			    (uint8_t *)&connp->conn_bound_source, lport);
10150Sstevel@tonic-gate 		}
10160Sstevel@tonic-gate 		break;
10170Sstevel@tonic-gate 
10180Sstevel@tonic-gate 	case IPPROTO_SCTP:
10190Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
10200Sstevel@tonic-gate 		break;
10210Sstevel@tonic-gate 	}
10220Sstevel@tonic-gate 
10230Sstevel@tonic-gate 	return (ret);
10240Sstevel@tonic-gate }
10250Sstevel@tonic-gate 
10260Sstevel@tonic-gate int
10270Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
10280Sstevel@tonic-gate     uint16_t lport)
10290Sstevel@tonic-gate {
10300Sstevel@tonic-gate 	connf_t	*connfp;
10310Sstevel@tonic-gate 	int	ret = 0;
10320Sstevel@tonic-gate 
10330Sstevel@tonic-gate 	ASSERT(connp);
10340Sstevel@tonic-gate 
10350Sstevel@tonic-gate 	connp->conn_ulp = protocol;
10360Sstevel@tonic-gate 	connp->conn_srcv6 = *src;
10370Sstevel@tonic-gate 	connp->conn_lport = lport;
10380Sstevel@tonic-gate 
10390Sstevel@tonic-gate 	switch (protocol) {
1040*1676Sjpk 	default:
1041*1676Sjpk 		if (is_system_labeled() && check_exempt_conflict_v6(connp))
1042*1676Sjpk 			return (EADDRINUSE);
1043*1676Sjpk 		/* FALLTHROUGH */
10440Sstevel@tonic-gate 	case IPPROTO_UDP:
10450Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
10460Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
10470Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - udp\n",
10480Sstevel@tonic-gate 			    (void *)connp));
10490Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
10500Sstevel@tonic-gate 		} else {
10510Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
10520Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - protocol\n",
10530Sstevel@tonic-gate 			    (void *)connp));
10540Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout_v6[protocol];
10550Sstevel@tonic-gate 		}
10560Sstevel@tonic-gate 
10570Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
10580Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
10590Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
10600Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10610Sstevel@tonic-gate 		} else {
10620Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10630Sstevel@tonic-gate 		}
10640Sstevel@tonic-gate 		break;
10650Sstevel@tonic-gate 
10660Sstevel@tonic-gate 	case IPPROTO_TCP:
10670Sstevel@tonic-gate 		/* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */
10680Sstevel@tonic-gate 
10690Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
1070*1676Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
10710Sstevel@tonic-gate 		connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
10720Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
10730Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10740Sstevel@tonic-gate 		} else {
10750Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10760Sstevel@tonic-gate 		}
10770Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
10780Sstevel@tonic-gate 			sa_family_t	addr_family;
10790Sstevel@tonic-gate 			uint8_t		*laddrp;
10800Sstevel@tonic-gate 
10810Sstevel@tonic-gate 			if (connp->conn_pkt_isv6) {
10820Sstevel@tonic-gate 				addr_family = AF_INET6;
10830Sstevel@tonic-gate 				laddrp =
10840Sstevel@tonic-gate 				    (uint8_t *)&connp->conn_bound_source_v6;
10850Sstevel@tonic-gate 			} else {
10860Sstevel@tonic-gate 				addr_family = AF_INET;
10870Sstevel@tonic-gate 				laddrp = (uint8_t *)&connp->conn_bound_source;
10880Sstevel@tonic-gate 			}
10890Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
10900Sstevel@tonic-gate 			(*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp,
10910Sstevel@tonic-gate 			    lport);
10920Sstevel@tonic-gate 		}
10930Sstevel@tonic-gate 		break;
10940Sstevel@tonic-gate 
10950Sstevel@tonic-gate 	case IPPROTO_SCTP:
10960Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
10970Sstevel@tonic-gate 		break;
10980Sstevel@tonic-gate 	}
10990Sstevel@tonic-gate 
11000Sstevel@tonic-gate 	return (ret);
11010Sstevel@tonic-gate }
11020Sstevel@tonic-gate 
11030Sstevel@tonic-gate /*
11040Sstevel@tonic-gate  * ipcl_conn_hash insertion routines.
11050Sstevel@tonic-gate  */
11060Sstevel@tonic-gate int
11070Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src,
11080Sstevel@tonic-gate     ipaddr_t rem, uint32_t ports)
11090Sstevel@tonic-gate {
11100Sstevel@tonic-gate 	connf_t		*connfp;
11110Sstevel@tonic-gate 	uint16_t	*up;
11120Sstevel@tonic-gate 	conn_t		*tconnp;
11130Sstevel@tonic-gate #ifdef	IPCL_DEBUG
11140Sstevel@tonic-gate 	char	sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE];
11150Sstevel@tonic-gate #endif
11160Sstevel@tonic-gate 	in_port_t	lport;
11170Sstevel@tonic-gate 	int		ret = 0;
11180Sstevel@tonic-gate 
11190Sstevel@tonic-gate 	IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, "
11200Sstevel@tonic-gate 	    "dst = %s, ports = %x, protocol = %x", (void *)connp,
11210Sstevel@tonic-gate 	    inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf),
11220Sstevel@tonic-gate 	    ports, protocol));
11230Sstevel@tonic-gate 
11240Sstevel@tonic-gate 	switch (protocol) {
11250Sstevel@tonic-gate 	case IPPROTO_TCP:
11260Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
11270Sstevel@tonic-gate 			/*
11280Sstevel@tonic-gate 			 * for a eager connection, i.e connections which
11290Sstevel@tonic-gate 			 * have just been created, the initialization is
11300Sstevel@tonic-gate 			 * already done in ip at conn_creation time, so
11310Sstevel@tonic-gate 			 * we can skip the checks here.
11320Sstevel@tonic-gate 			 */
11330Sstevel@tonic-gate 			IPCL_CONN_INIT(connp, protocol, src, rem, ports);
11340Sstevel@tonic-gate 		}
11350Sstevel@tonic-gate 		connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(connp->conn_rem,
11360Sstevel@tonic-gate 		    connp->conn_ports)];
11370Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
11380Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
11390Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
11400Sstevel@tonic-gate 			if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp,
11410Sstevel@tonic-gate 			    connp->conn_rem, connp->conn_src,
11420Sstevel@tonic-gate 			    connp->conn_ports)) {
11430Sstevel@tonic-gate 
11440Sstevel@tonic-gate 				/* Already have a conn. bail out */
11450Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
11460Sstevel@tonic-gate 				return (EADDRINUSE);
11470Sstevel@tonic-gate 			}
11480Sstevel@tonic-gate 		}
11490Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
11500Sstevel@tonic-gate 			/*
11510Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
11520Sstevel@tonic-gate 			 * rebind. Let it happen.
11530Sstevel@tonic-gate 			 */
11540Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
11550Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
11560Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
11570Sstevel@tonic-gate 		}
11580Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
11590Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
11600Sstevel@tonic-gate 		break;
11610Sstevel@tonic-gate 
11620Sstevel@tonic-gate 	case IPPROTO_SCTP:
1163409Skcpoon 		/*
1164409Skcpoon 		 * The raw socket may have already been bound, remove it
1165409Skcpoon 		 * from the hash first.
1166409Skcpoon 		 */
1167409Skcpoon 		IPCL_HASH_REMOVE(connp);
1168409Skcpoon 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
11690Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
11700Sstevel@tonic-gate 		break;
11710Sstevel@tonic-gate 
1172*1676Sjpk 	default:
1173*1676Sjpk 		/*
1174*1676Sjpk 		 * Check for conflicts among MAC exempt bindings.  For
1175*1676Sjpk 		 * transports with port numbers, this is done by the upper
1176*1676Sjpk 		 * level per-transport binding logic.  For all others, it's
1177*1676Sjpk 		 * done here.
1178*1676Sjpk 		 */
1179*1676Sjpk 		if (is_system_labeled() && check_exempt_conflict_v4(connp))
1180*1676Sjpk 			return (EADDRINUSE);
1181*1676Sjpk 		/* FALLTHROUGH */
1182*1676Sjpk 
11830Sstevel@tonic-gate 	case IPPROTO_UDP:
11840Sstevel@tonic-gate 		up = (uint16_t *)&ports;
11850Sstevel@tonic-gate 		IPCL_CONN_INIT(connp, protocol, src, rem, ports);
11860Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
11870Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])];
11880Sstevel@tonic-gate 		} else {
11890Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout[protocol];
11900Sstevel@tonic-gate 		}
11910Sstevel@tonic-gate 
11920Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
11930Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
11940Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
11950Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
11960Sstevel@tonic-gate 		} else {
11970Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
11980Sstevel@tonic-gate 		}
11990Sstevel@tonic-gate 		break;
12000Sstevel@tonic-gate 	}
12010Sstevel@tonic-gate 
12020Sstevel@tonic-gate 	return (ret);
12030Sstevel@tonic-gate }
12040Sstevel@tonic-gate 
12050Sstevel@tonic-gate int
12060Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
12070Sstevel@tonic-gate     const in6_addr_t *rem, uint32_t ports, uint_t ifindex)
12080Sstevel@tonic-gate {
12090Sstevel@tonic-gate 	connf_t		*connfp;
12100Sstevel@tonic-gate 	uint16_t	*up;
12110Sstevel@tonic-gate 	conn_t		*tconnp;
12120Sstevel@tonic-gate 	in_port_t	lport;
12130Sstevel@tonic-gate 	int		ret = 0;
12140Sstevel@tonic-gate 
12150Sstevel@tonic-gate 	switch (protocol) {
12160Sstevel@tonic-gate 	case IPPROTO_TCP:
12170Sstevel@tonic-gate 		/* Just need to insert a conn struct */
12180Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
12190Sstevel@tonic-gate 			IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
12200Sstevel@tonic-gate 		}
12210Sstevel@tonic-gate 		connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(connp->conn_remv6,
12220Sstevel@tonic-gate 		    connp->conn_ports)];
12230Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
12240Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
12250Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
12260Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp,
12270Sstevel@tonic-gate 			    connp->conn_remv6, connp->conn_srcv6,
12280Sstevel@tonic-gate 			    connp->conn_ports) &&
12290Sstevel@tonic-gate 			    (tconnp->conn_tcp->tcp_bound_if == 0 ||
12300Sstevel@tonic-gate 			    tconnp->conn_tcp->tcp_bound_if == ifindex)) {
12310Sstevel@tonic-gate 				/* Already have a conn. bail out */
12320Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
12330Sstevel@tonic-gate 				return (EADDRINUSE);
12340Sstevel@tonic-gate 			}
12350Sstevel@tonic-gate 		}
12360Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
12370Sstevel@tonic-gate 			/*
12380Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
12390Sstevel@tonic-gate 			 * rebind. Let it happen.
12400Sstevel@tonic-gate 			 */
12410Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
12420Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
12430Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
12440Sstevel@tonic-gate 		}
12450Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
12460Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
12470Sstevel@tonic-gate 		break;
12480Sstevel@tonic-gate 
12490Sstevel@tonic-gate 	case IPPROTO_SCTP:
1250409Skcpoon 		IPCL_HASH_REMOVE(connp);
1251409Skcpoon 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
12520Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
12530Sstevel@tonic-gate 		break;
12540Sstevel@tonic-gate 
1255*1676Sjpk 	default:
1256*1676Sjpk 		if (is_system_labeled() && check_exempt_conflict_v6(connp))
1257*1676Sjpk 			return (EADDRINUSE);
1258*1676Sjpk 		/* FALLTHROUGH */
12590Sstevel@tonic-gate 	case IPPROTO_UDP:
12600Sstevel@tonic-gate 		up = (uint16_t *)&ports;
12610Sstevel@tonic-gate 		IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
12620Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
12630Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])];
12640Sstevel@tonic-gate 		} else {
12650Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout_v6[protocol];
12660Sstevel@tonic-gate 		}
12670Sstevel@tonic-gate 
12680Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
12690Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
12700Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
12710Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12720Sstevel@tonic-gate 		} else {
12730Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12740Sstevel@tonic-gate 		}
12750Sstevel@tonic-gate 		break;
12760Sstevel@tonic-gate 	}
12770Sstevel@tonic-gate 
12780Sstevel@tonic-gate 	return (ret);
12790Sstevel@tonic-gate }
12800Sstevel@tonic-gate 
12810Sstevel@tonic-gate /*
12820Sstevel@tonic-gate  * v4 packet classifying function. looks up the fanout table to
12830Sstevel@tonic-gate  * find the conn, the packet belongs to. returns the conn with
12840Sstevel@tonic-gate  * the reference held, null otherwise.
1285*1676Sjpk  *
1286*1676Sjpk  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1287*1676Sjpk  * Lookup" comment block are applied.  Labels are also checked as described
1288*1676Sjpk  * above.  If the packet is from the inside (looped back), and is from the same
1289*1676Sjpk  * zone, then label checks are omitted.
12900Sstevel@tonic-gate  */
12910Sstevel@tonic-gate conn_t *
12920Sstevel@tonic-gate ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid)
12930Sstevel@tonic-gate {
12940Sstevel@tonic-gate 	ipha_t	*ipha;
12950Sstevel@tonic-gate 	connf_t	*connfp, *bind_connfp;
12960Sstevel@tonic-gate 	uint16_t lport;
12970Sstevel@tonic-gate 	uint16_t fport;
12980Sstevel@tonic-gate 	uint32_t ports;
12990Sstevel@tonic-gate 	conn_t	*connp;
13000Sstevel@tonic-gate 	uint16_t  *up;
1301*1676Sjpk 	boolean_t shared_addr;
1302*1676Sjpk 	boolean_t unlabeled;
13030Sstevel@tonic-gate 
13040Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
13050Sstevel@tonic-gate 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
13060Sstevel@tonic-gate 
13070Sstevel@tonic-gate 	switch (protocol) {
13080Sstevel@tonic-gate 	case IPPROTO_TCP:
13090Sstevel@tonic-gate 		ports = *(uint32_t *)up;
13100Sstevel@tonic-gate 		connfp =
13110Sstevel@tonic-gate 		    &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, ports)];
13120Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
13130Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
13140Sstevel@tonic-gate 		    connp = connp->conn_next) {
13150Sstevel@tonic-gate 			if (IPCL_CONN_MATCH(connp, protocol,
13160Sstevel@tonic-gate 			    ipha->ipha_src, ipha->ipha_dst, ports))
13170Sstevel@tonic-gate 				break;
13180Sstevel@tonic-gate 		}
13190Sstevel@tonic-gate 
13200Sstevel@tonic-gate 		if (connp != NULL) {
1321*1676Sjpk 			/*
1322*1676Sjpk 			 * We have a fully-bound TCP connection.
1323*1676Sjpk 			 *
1324*1676Sjpk 			 * For labeled systems, there's no need to check the
1325*1676Sjpk 			 * label here.  It's known to be good as we checked
1326*1676Sjpk 			 * before allowing the connection to become bound.
1327*1676Sjpk 			 */
13280Sstevel@tonic-gate 			CONN_INC_REF(connp);
13290Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
13300Sstevel@tonic-gate 			return (connp);
13310Sstevel@tonic-gate 		}
13320Sstevel@tonic-gate 
13330Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
13340Sstevel@tonic-gate 
13350Sstevel@tonic-gate 		lport = up[1];
1336*1676Sjpk 		unlabeled = B_FALSE;
1337*1676Sjpk 		/* Cred cannot be null on IPv4 */
1338*1676Sjpk 		if (is_system_labeled())
1339*1676Sjpk 			unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags &
1340*1676Sjpk 			    TSLF_UNLABELED) != 0;
1341*1676Sjpk 		shared_addr = (zoneid == ALL_ZONES);
1342*1676Sjpk 		if (shared_addr) {
1343*1676Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
1344*1676Sjpk 			/*
1345*1676Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
1346*1676Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
1347*1676Sjpk 			 * search for the zone based on the packet label.
1348*1676Sjpk 			 *
1349*1676Sjpk 			 * If there is such a zone, we prefer to find a
1350*1676Sjpk 			 * connection in it.  Otherwise, we look for a
1351*1676Sjpk 			 * MAC-exempt connection in any zone whose label
1352*1676Sjpk 			 * dominates the default label on the packet.
1353*1676Sjpk 			 */
1354*1676Sjpk 			if (zoneid == ALL_ZONES)
1355*1676Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
1356*1676Sjpk 			else
1357*1676Sjpk 				unlabeled = B_FALSE;
1358*1676Sjpk 		}
1359*1676Sjpk 
13600Sstevel@tonic-gate 		bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
13610Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
13620Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
13630Sstevel@tonic-gate 		    connp = connp->conn_next) {
1364*1676Sjpk 			if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1365*1676Sjpk 			    lport) &&
1366*1676Sjpk 			    (connp->conn_zoneid == zoneid ||
1367*1676Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
13680Sstevel@tonic-gate 				break;
13690Sstevel@tonic-gate 		}
13700Sstevel@tonic-gate 
1371*1676Sjpk 		/*
1372*1676Sjpk 		 * If the matching connection is SLP on a private address, then
1373*1676Sjpk 		 * the label on the packet must match the local zone's label.
1374*1676Sjpk 		 * Otherwise, it must be in the label range defined by tnrh.
1375*1676Sjpk 		 * This is ensured by tsol_receive_label.
1376*1676Sjpk 		 */
1377*1676Sjpk 		if (connp != NULL && is_system_labeled() &&
1378*1676Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1379*1676Sjpk 		    shared_addr, connp)) {
1380*1676Sjpk 				DTRACE_PROBE3(
1381*1676Sjpk 				    tx__ip__log__info__classify__tcp,
1382*1676Sjpk 				    char *,
1383*1676Sjpk 				    "connp(1) could not receive mp(2)",
1384*1676Sjpk 				    conn_t *, connp, mblk_t *, mp);
1385*1676Sjpk 			connp = NULL;
1386*1676Sjpk 		}
1387*1676Sjpk 
13880Sstevel@tonic-gate 		if (connp != NULL) {
1389*1676Sjpk 			/* Have a listener at least */
13900Sstevel@tonic-gate 			CONN_INC_REF(connp);
13910Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
13920Sstevel@tonic-gate 			return (connp);
13930Sstevel@tonic-gate 		}
13940Sstevel@tonic-gate 
13950Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
13960Sstevel@tonic-gate 
13970Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
13980Sstevel@tonic-gate 		    ("ipcl_classify: couldn't classify mp = %p\n",
13990Sstevel@tonic-gate 		    (void *)mp));
14000Sstevel@tonic-gate 		break;
14010Sstevel@tonic-gate 
14020Sstevel@tonic-gate 	case IPPROTO_UDP:
14030Sstevel@tonic-gate 		lport = up[1];
1404*1676Sjpk 		unlabeled = B_FALSE;
1405*1676Sjpk 		/* Cred cannot be null on IPv4 */
1406*1676Sjpk 		if (is_system_labeled())
1407*1676Sjpk 			unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags &
1408*1676Sjpk 			    TSLF_UNLABELED) != 0;
1409*1676Sjpk 		shared_addr = (zoneid == ALL_ZONES);
1410*1676Sjpk 		if (shared_addr) {
1411*1676Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
1412*1676Sjpk 			/*
1413*1676Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
1414*1676Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
1415*1676Sjpk 			 * search for the zone based on the packet label.
1416*1676Sjpk 			 *
1417*1676Sjpk 			 * If there is such a zone, we prefer to find a
1418*1676Sjpk 			 * connection in it.  Otherwise, we look for a
1419*1676Sjpk 			 * MAC-exempt connection in any zone whose label
1420*1676Sjpk 			 * dominates the default label on the packet.
1421*1676Sjpk 			 */
1422*1676Sjpk 			if (zoneid == ALL_ZONES)
1423*1676Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
1424*1676Sjpk 			else
1425*1676Sjpk 				unlabeled = B_FALSE;
1426*1676Sjpk 		}
14270Sstevel@tonic-gate 		fport = up[0];
14280Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport));
14290Sstevel@tonic-gate 		connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
14300Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
14310Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
14320Sstevel@tonic-gate 		    connp = connp->conn_next) {
14330Sstevel@tonic-gate 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
14340Sstevel@tonic-gate 			    fport, ipha->ipha_src) &&
1435*1676Sjpk 			    (connp->conn_zoneid == zoneid ||
1436*1676Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
14370Sstevel@tonic-gate 				break;
14380Sstevel@tonic-gate 		}
14390Sstevel@tonic-gate 
1440*1676Sjpk 		if (connp != NULL && is_system_labeled() &&
1441*1676Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1442*1676Sjpk 		    shared_addr, connp)) {
1443*1676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1444*1676Sjpk 			    char *, "connp(1) could not receive mp(2)",
1445*1676Sjpk 			    conn_t *, connp, mblk_t *, mp);
1446*1676Sjpk 			connp = NULL;
1447*1676Sjpk 		}
1448*1676Sjpk 
14490Sstevel@tonic-gate 		if (connp != NULL) {
14500Sstevel@tonic-gate 			CONN_INC_REF(connp);
14510Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
14520Sstevel@tonic-gate 			return (connp);
14530Sstevel@tonic-gate 		}
14540Sstevel@tonic-gate 
14550Sstevel@tonic-gate 		/*
14560Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
14570Sstevel@tonic-gate 		 */
14580Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
14590Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
14600Sstevel@tonic-gate 		    ("ipcl_classify: cant find udp conn_t for ports : %x %x",
14610Sstevel@tonic-gate 		    lport, fport));
14620Sstevel@tonic-gate 		break;
14630Sstevel@tonic-gate 	}
14640Sstevel@tonic-gate 
14650Sstevel@tonic-gate 	return (NULL);
14660Sstevel@tonic-gate }
14670Sstevel@tonic-gate 
14680Sstevel@tonic-gate conn_t *
14690Sstevel@tonic-gate ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid)
14700Sstevel@tonic-gate {
14710Sstevel@tonic-gate 	ip6_t		*ip6h;
14720Sstevel@tonic-gate 	connf_t		*connfp, *bind_connfp;
14730Sstevel@tonic-gate 	uint16_t	lport;
14740Sstevel@tonic-gate 	uint16_t	fport;
14750Sstevel@tonic-gate 	tcph_t		*tcph;
14760Sstevel@tonic-gate 	uint32_t	ports;
14770Sstevel@tonic-gate 	conn_t		*connp;
14780Sstevel@tonic-gate 	uint16_t	*up;
1479*1676Sjpk 	boolean_t	shared_addr;
1480*1676Sjpk 	boolean_t	unlabeled;
14810Sstevel@tonic-gate 
14820Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
14830Sstevel@tonic-gate 
14840Sstevel@tonic-gate 	switch (protocol) {
14850Sstevel@tonic-gate 	case IPPROTO_TCP:
14860Sstevel@tonic-gate 		tcph = (tcph_t *)&mp->b_rptr[hdr_len];
14870Sstevel@tonic-gate 		up = (uint16_t *)tcph->th_lport;
14880Sstevel@tonic-gate 		ports = *(uint32_t *)up;
14890Sstevel@tonic-gate 
14900Sstevel@tonic-gate 		connfp =
14910Sstevel@tonic-gate 		    &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, ports)];
14920Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
14930Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
14940Sstevel@tonic-gate 		    connp = connp->conn_next) {
14950Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(connp, protocol,
14960Sstevel@tonic-gate 			    ip6h->ip6_src, ip6h->ip6_dst, ports))
14970Sstevel@tonic-gate 				break;
14980Sstevel@tonic-gate 		}
14990Sstevel@tonic-gate 
15000Sstevel@tonic-gate 		if (connp != NULL) {
1501*1676Sjpk 			/*
1502*1676Sjpk 			 * We have a fully-bound TCP connection.
1503*1676Sjpk 			 *
1504*1676Sjpk 			 * For labeled systems, there's no need to check the
1505*1676Sjpk 			 * label here.  It's known to be good as we checked
1506*1676Sjpk 			 * before allowing the connection to become bound.
1507*1676Sjpk 			 */
15080Sstevel@tonic-gate 			CONN_INC_REF(connp);
15090Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
15100Sstevel@tonic-gate 			return (connp);
15110Sstevel@tonic-gate 		}
15120Sstevel@tonic-gate 
15130Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
15140Sstevel@tonic-gate 
15150Sstevel@tonic-gate 		lport = up[1];
1516*1676Sjpk 		unlabeled = B_FALSE;
1517*1676Sjpk 		/* Cred can be null on IPv6 */
1518*1676Sjpk 		if (is_system_labeled()) {
1519*1676Sjpk 			cred_t *cr = DB_CRED(mp);
1520*1676Sjpk 
1521*1676Sjpk 			unlabeled = (cr != NULL &&
1522*1676Sjpk 			    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
1523*1676Sjpk 		}
1524*1676Sjpk 		shared_addr = (zoneid == ALL_ZONES);
1525*1676Sjpk 		if (shared_addr) {
1526*1676Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
1527*1676Sjpk 			/*
1528*1676Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
1529*1676Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
1530*1676Sjpk 			 * search for the zone based on the packet label.
1531*1676Sjpk 			 *
1532*1676Sjpk 			 * If there is such a zone, we prefer to find a
1533*1676Sjpk 			 * connection in it.  Otherwise, we look for a
1534*1676Sjpk 			 * MAC-exempt connection in any zone whose label
1535*1676Sjpk 			 * dominates the default label on the packet.
1536*1676Sjpk 			 */
1537*1676Sjpk 			if (zoneid == ALL_ZONES)
1538*1676Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
1539*1676Sjpk 			else
1540*1676Sjpk 				unlabeled = B_FALSE;
1541*1676Sjpk 		}
1542*1676Sjpk 
15430Sstevel@tonic-gate 		bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
15440Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
15450Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
15460Sstevel@tonic-gate 		    connp = connp->conn_next) {
15470Sstevel@tonic-gate 			if (IPCL_BIND_MATCH_V6(connp, protocol,
15480Sstevel@tonic-gate 			    ip6h->ip6_dst, lport) &&
1549*1676Sjpk 			    (connp->conn_zoneid == zoneid ||
1550*1676Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
15510Sstevel@tonic-gate 				break;
15520Sstevel@tonic-gate 		}
15530Sstevel@tonic-gate 
1554*1676Sjpk 		if (connp != NULL && is_system_labeled() &&
1555*1676Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1556*1676Sjpk 		    shared_addr, connp)) {
1557*1676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1558*1676Sjpk 			    char *, "connp(1) could not receive mp(2)",
1559*1676Sjpk 			    conn_t *, connp, mblk_t *, mp);
1560*1676Sjpk 			connp = NULL;
1561*1676Sjpk 		}
1562*1676Sjpk 
15630Sstevel@tonic-gate 		if (connp != NULL) {
15640Sstevel@tonic-gate 			/* Have a listner at least */
15650Sstevel@tonic-gate 			CONN_INC_REF(connp);
15660Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
15670Sstevel@tonic-gate 			IPCL_DEBUG_LVL(512,
15680Sstevel@tonic-gate 			    ("ipcl_classify_v6: found listner "
15690Sstevel@tonic-gate 			    "connp = %p\n", (void *)connp));
15700Sstevel@tonic-gate 
15710Sstevel@tonic-gate 			return (connp);
15720Sstevel@tonic-gate 		}
15730Sstevel@tonic-gate 
15740Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
15750Sstevel@tonic-gate 
15760Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
15770Sstevel@tonic-gate 		    ("ipcl_classify_v6: couldn't classify mp = %p\n",
15780Sstevel@tonic-gate 		    (void *)mp));
15790Sstevel@tonic-gate 		break;
15800Sstevel@tonic-gate 
15810Sstevel@tonic-gate 	case IPPROTO_UDP:
15820Sstevel@tonic-gate 		up = (uint16_t *)&mp->b_rptr[hdr_len];
15830Sstevel@tonic-gate 		lport = up[1];
1584*1676Sjpk 		unlabeled = B_FALSE;
1585*1676Sjpk 		/* Cred can be null on IPv6 */
1586*1676Sjpk 		if (is_system_labeled()) {
1587*1676Sjpk 			cred_t *cr = DB_CRED(mp);
1588*1676Sjpk 
1589*1676Sjpk 			unlabeled = (cr != NULL &&
1590*1676Sjpk 			    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
1591*1676Sjpk 		}
1592*1676Sjpk 		shared_addr = (zoneid == ALL_ZONES);
1593*1676Sjpk 		if (shared_addr) {
1594*1676Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
1595*1676Sjpk 			/*
1596*1676Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
1597*1676Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
1598*1676Sjpk 			 * search for the zone based on the packet label.
1599*1676Sjpk 			 *
1600*1676Sjpk 			 * If there is such a zone, we prefer to find a
1601*1676Sjpk 			 * connection in it.  Otherwise, we look for a
1602*1676Sjpk 			 * MAC-exempt connection in any zone whose label
1603*1676Sjpk 			 * dominates the default label on the packet.
1604*1676Sjpk 			 */
1605*1676Sjpk 			if (zoneid == ALL_ZONES)
1606*1676Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
1607*1676Sjpk 			else
1608*1676Sjpk 				unlabeled = B_FALSE;
1609*1676Sjpk 		}
1610*1676Sjpk 
16110Sstevel@tonic-gate 		fport = up[0];
16120Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport,
16130Sstevel@tonic-gate 		    fport));
16140Sstevel@tonic-gate 		connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
16150Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
16160Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
16170Sstevel@tonic-gate 		    connp = connp->conn_next) {
16180Sstevel@tonic-gate 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
16190Sstevel@tonic-gate 			    fport, ip6h->ip6_src) &&
1620*1676Sjpk 			    (connp->conn_zoneid == zoneid ||
1621*1676Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
16220Sstevel@tonic-gate 				break;
16230Sstevel@tonic-gate 		}
16240Sstevel@tonic-gate 
1625*1676Sjpk 		if (connp != NULL && is_system_labeled() &&
1626*1676Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1627*1676Sjpk 		    shared_addr, connp)) {
1628*1676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1629*1676Sjpk 			    char *, "connp(1) could not receive mp(2)",
1630*1676Sjpk 			    conn_t *, connp, mblk_t *, mp);
1631*1676Sjpk 			connp = NULL;
1632*1676Sjpk 		}
1633*1676Sjpk 
16340Sstevel@tonic-gate 		if (connp != NULL) {
16350Sstevel@tonic-gate 			CONN_INC_REF(connp);
16360Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
16370Sstevel@tonic-gate 			return (connp);
16380Sstevel@tonic-gate 		}
16390Sstevel@tonic-gate 
16400Sstevel@tonic-gate 		/*
16410Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
16420Sstevel@tonic-gate 		 */
16430Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
16440Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
16450Sstevel@tonic-gate 		    ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x",
16460Sstevel@tonic-gate 		    lport, fport));
16470Sstevel@tonic-gate 		break;
16480Sstevel@tonic-gate 	}
16490Sstevel@tonic-gate 
16500Sstevel@tonic-gate 	return (NULL);
16510Sstevel@tonic-gate }
16520Sstevel@tonic-gate 
16530Sstevel@tonic-gate /*
16540Sstevel@tonic-gate  * wrapper around ipcl_classify_(v4,v6) routines.
16550Sstevel@tonic-gate  */
16560Sstevel@tonic-gate conn_t *
16570Sstevel@tonic-gate ipcl_classify(mblk_t *mp, zoneid_t zoneid)
16580Sstevel@tonic-gate {
16590Sstevel@tonic-gate 	uint16_t	hdr_len;
16600Sstevel@tonic-gate 	ipha_t		*ipha;
16610Sstevel@tonic-gate 	uint8_t		*nexthdrp;
16620Sstevel@tonic-gate 
16630Sstevel@tonic-gate 	if (MBLKL(mp) < sizeof (ipha_t))
16640Sstevel@tonic-gate 		return (NULL);
16650Sstevel@tonic-gate 
16660Sstevel@tonic-gate 	switch (IPH_HDR_VERSION(mp->b_rptr)) {
16670Sstevel@tonic-gate 	case IPV4_VERSION:
16680Sstevel@tonic-gate 		ipha = (ipha_t *)mp->b_rptr;
16690Sstevel@tonic-gate 		hdr_len = IPH_HDR_LENGTH(ipha);
16700Sstevel@tonic-gate 		return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len,
16710Sstevel@tonic-gate 		    zoneid));
16720Sstevel@tonic-gate 	case IPV6_VERSION:
16730Sstevel@tonic-gate 		if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr,
16740Sstevel@tonic-gate 		    &hdr_len, &nexthdrp))
16750Sstevel@tonic-gate 			return (NULL);
16760Sstevel@tonic-gate 
16770Sstevel@tonic-gate 		return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid));
16780Sstevel@tonic-gate 	}
16790Sstevel@tonic-gate 
16800Sstevel@tonic-gate 	return (NULL);
16810Sstevel@tonic-gate }
16820Sstevel@tonic-gate 
16830Sstevel@tonic-gate conn_t *
1684*1676Sjpk ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid,
1685*1676Sjpk     uint32_t ports, ipha_t *hdr)
16860Sstevel@tonic-gate {
1687*1676Sjpk 	connf_t		*connfp;
16880Sstevel@tonic-gate 	conn_t		*connp;
16890Sstevel@tonic-gate 	in_port_t	lport;
16900Sstevel@tonic-gate 	int		af;
1691*1676Sjpk 	boolean_t	shared_addr;
1692*1676Sjpk 	boolean_t	unlabeled;
1693*1676Sjpk 	const void	*dst;
16940Sstevel@tonic-gate 
16950Sstevel@tonic-gate 	lport = ((uint16_t *)&ports)[1];
1696*1676Sjpk 
1697*1676Sjpk 	unlabeled = B_FALSE;
1698*1676Sjpk 	/* Cred can be null on IPv6 */
1699*1676Sjpk 	if (is_system_labeled()) {
1700*1676Sjpk 		cred_t *cr = DB_CRED(mp);
1701*1676Sjpk 
1702*1676Sjpk 		unlabeled = (cr != NULL &&
1703*1676Sjpk 		    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
1704*1676Sjpk 	}
1705*1676Sjpk 	shared_addr = (zoneid == ALL_ZONES);
1706*1676Sjpk 	if (shared_addr) {
1707*1676Sjpk 		zoneid = tsol_mlp_findzone(protocol, lport);
1708*1676Sjpk 		/*
1709*1676Sjpk 		 * If no shared MLP is found, tsol_mlp_findzone returns
1710*1676Sjpk 		 * ALL_ZONES.  In that case, we assume it's SLP, and search for
1711*1676Sjpk 		 * the zone based on the packet label.
1712*1676Sjpk 		 *
1713*1676Sjpk 		 * If there is such a zone, we prefer to find a connection in
1714*1676Sjpk 		 * it.  Otherwise, we look for a MAC-exempt connection in any
1715*1676Sjpk 		 * zone whose label dominates the default label on the packet.
1716*1676Sjpk 		 */
1717*1676Sjpk 		if (zoneid == ALL_ZONES)
1718*1676Sjpk 			zoneid = tsol_packet_to_zoneid(mp);
1719*1676Sjpk 		else
1720*1676Sjpk 			unlabeled = B_FALSE;
1721*1676Sjpk 	}
1722*1676Sjpk 
17230Sstevel@tonic-gate 	af = IPH_HDR_VERSION(hdr);
1724*1676Sjpk 	dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst :
1725*1676Sjpk 	    (const void *)&((ip6_t *)hdr)->ip6_dst;
17260Sstevel@tonic-gate 	connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))];
17270Sstevel@tonic-gate 
17280Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
17290Sstevel@tonic-gate 	for (connp = connfp->connf_head; connp != NULL;
17300Sstevel@tonic-gate 	    connp = connp->conn_next) {
17310Sstevel@tonic-gate 		/* We don't allow v4 fallback for v6 raw socket. */
1732*1676Sjpk 		if (af == (connp->conn_af_isv6 ? IPV4_VERSION :
1733*1676Sjpk 		    IPV6_VERSION))
17340Sstevel@tonic-gate 			continue;
17350Sstevel@tonic-gate 		if (connp->conn_fully_bound) {
17360Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
1737*1676Sjpk 				if (!IPCL_CONN_MATCH(connp, protocol,
1738*1676Sjpk 				    hdr->ipha_src, hdr->ipha_dst, ports))
1739*1676Sjpk 					continue;
17400Sstevel@tonic-gate 			} else {
1741*1676Sjpk 				if (!IPCL_CONN_MATCH_V6(connp, protocol,
17420Sstevel@tonic-gate 				    ((ip6_t *)hdr)->ip6_src,
1743*1676Sjpk 				    ((ip6_t *)hdr)->ip6_dst, ports))
1744*1676Sjpk 					continue;
17450Sstevel@tonic-gate 			}
17460Sstevel@tonic-gate 		} else {
17470Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
1748*1676Sjpk 				if (!IPCL_BIND_MATCH(connp, protocol,
1749*1676Sjpk 				    hdr->ipha_dst, lport))
1750*1676Sjpk 					continue;
17510Sstevel@tonic-gate 			} else {
1752*1676Sjpk 				if (!IPCL_BIND_MATCH_V6(connp, protocol,
1753*1676Sjpk 				    ((ip6_t *)hdr)->ip6_dst, lport))
1754*1676Sjpk 					continue;
17550Sstevel@tonic-gate 			}
17560Sstevel@tonic-gate 		}
1757*1676Sjpk 
1758*1676Sjpk 		if (connp->conn_zoneid == zoneid ||
1759*1676Sjpk 		    (unlabeled && connp->conn_mac_exempt))
1760*1676Sjpk 			break;
1761*1676Sjpk 	}
1762*1676Sjpk 	/*
1763*1676Sjpk 	 * If the connection is fully-bound and connection-oriented (TCP or
1764*1676Sjpk 	 * SCTP), then we've already validated the remote system's label.
1765*1676Sjpk 	 * There's no need to do it again for every packet.
1766*1676Sjpk 	 */
1767*1676Sjpk 	if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound ||
1768*1676Sjpk 	    !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) &&
1769*1676Sjpk 	    !tsol_receive_local(mp, dst, af, shared_addr, connp)) {
1770*1676Sjpk 		DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1771*1676Sjpk 		    char *, "connp(1) could not receive mp(2)",
1772*1676Sjpk 		    conn_t *, connp, mblk_t *, mp);
1773*1676Sjpk 		connp = NULL;
17740Sstevel@tonic-gate 	}
1775409Skcpoon 
1776409Skcpoon 	if (connp != NULL)
1777409Skcpoon 		goto found;
1778409Skcpoon 	mutex_exit(&connfp->connf_lock);
1779409Skcpoon 
1780409Skcpoon 	/* Try to look for a wildcard match. */
1781409Skcpoon 	connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(0)];
1782409Skcpoon 	mutex_enter(&connfp->connf_lock);
1783409Skcpoon 	for (connp = connfp->connf_head; connp != NULL;
1784409Skcpoon 	    connp = connp->conn_next) {
1785409Skcpoon 		/* We don't allow v4 fallback for v6 raw socket. */
1786409Skcpoon 		if ((af == (connp->conn_af_isv6 ? IPV4_VERSION :
1787409Skcpoon 		    IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) {
1788409Skcpoon 			continue;
1789409Skcpoon 		}
1790409Skcpoon 		if (af == IPV4_VERSION) {
1791409Skcpoon 			if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst))
1792409Skcpoon 				break;
1793409Skcpoon 		} else {
1794409Skcpoon 			if (IPCL_RAW_MATCH_V6(connp, protocol,
1795409Skcpoon 			    ((ip6_t *)hdr)->ip6_dst)) {
1796409Skcpoon 				break;
1797409Skcpoon 			}
1798409Skcpoon 		}
17990Sstevel@tonic-gate 	}
1800409Skcpoon 
1801409Skcpoon 	if (connp != NULL)
1802409Skcpoon 		goto found;
1803409Skcpoon 
18040Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
18050Sstevel@tonic-gate 	return (NULL);
1806409Skcpoon 
1807409Skcpoon found:
1808409Skcpoon 	ASSERT(connp != NULL);
1809409Skcpoon 	CONN_INC_REF(connp);
1810409Skcpoon 	mutex_exit(&connfp->connf_lock);
1811409Skcpoon 	return (connp);
18120Sstevel@tonic-gate }
18130Sstevel@tonic-gate 
18140Sstevel@tonic-gate /* ARGSUSED */
18150Sstevel@tonic-gate static int
18160Sstevel@tonic-gate ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags)
18170Sstevel@tonic-gate {
18180Sstevel@tonic-gate 	itc_t	*itc = (itc_t *)buf;
18190Sstevel@tonic-gate 	conn_t 	*connp = &itc->itc_conn;
18200Sstevel@tonic-gate 	tcp_t	*tcp = &itc->itc_tcp;
18210Sstevel@tonic-gate 	bzero(itc, sizeof (itc_t));
18220Sstevel@tonic-gate 	tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP);
18230Sstevel@tonic-gate 	connp->conn_tcp = tcp;
18240Sstevel@tonic-gate 	connp->conn_flags = IPCL_TCPCONN;
18250Sstevel@tonic-gate 	connp->conn_ulp = IPPROTO_TCP;
18260Sstevel@tonic-gate 	tcp->tcp_connp = connp;
18270Sstevel@tonic-gate 	return (0);
18280Sstevel@tonic-gate }
18290Sstevel@tonic-gate 
18300Sstevel@tonic-gate /* ARGSUSED */
18310Sstevel@tonic-gate static void
18320Sstevel@tonic-gate ipcl_tcpconn_destructor(void *buf, void *cdrarg)
18330Sstevel@tonic-gate {
18340Sstevel@tonic-gate 	tcp_timermp_free(((conn_t *)buf)->conn_tcp);
18350Sstevel@tonic-gate }
18360Sstevel@tonic-gate 
18370Sstevel@tonic-gate /*
18380Sstevel@tonic-gate  * All conns are inserted in a global multi-list for the benefit of
18390Sstevel@tonic-gate  * walkers. The walk is guaranteed to walk all open conns at the time
18400Sstevel@tonic-gate  * of the start of the walk exactly once. This property is needed to
18410Sstevel@tonic-gate  * achieve some cleanups during unplumb of interfaces. This is achieved
18420Sstevel@tonic-gate  * as follows.
18430Sstevel@tonic-gate  *
18440Sstevel@tonic-gate  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
18450Sstevel@tonic-gate  * call the insert and delete functions below at creation and deletion
18460Sstevel@tonic-gate  * time respectively. The conn never moves or changes its position in this
18470Sstevel@tonic-gate  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
18480Sstevel@tonic-gate  * won't increase due to walkers, once the conn deletion has started. Note
18490Sstevel@tonic-gate  * that we can't remove the conn from the global list and then wait for
18500Sstevel@tonic-gate  * the refcnt to drop to zero, since walkers would then see a truncated
18510Sstevel@tonic-gate  * list. CONN_INCIPIENT ensures that walkers don't start looking at
18520Sstevel@tonic-gate  * conns until ip_open is ready to make them globally visible.
18530Sstevel@tonic-gate  * The global round robin multi-list locks are held only to get the
18540Sstevel@tonic-gate  * next member/insertion/deletion and contention should be negligible
18550Sstevel@tonic-gate  * if the multi-list is much greater than the number of cpus.
18560Sstevel@tonic-gate  */
18570Sstevel@tonic-gate void
18580Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp)
18590Sstevel@tonic-gate {
18600Sstevel@tonic-gate 	int	index;
18610Sstevel@tonic-gate 
18620Sstevel@tonic-gate 	/*
18630Sstevel@tonic-gate 	 * No need for atomic here. Approximate even distribution
18640Sstevel@tonic-gate 	 * in the global lists is sufficient.
18650Sstevel@tonic-gate 	 */
18660Sstevel@tonic-gate 	conn_g_index++;
18670Sstevel@tonic-gate 	index = conn_g_index & (CONN_G_HASH_SIZE - 1);
18680Sstevel@tonic-gate 
18690Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
18700Sstevel@tonic-gate 	/*
18710Sstevel@tonic-gate 	 * Mark as INCIPIENT, so that walkers will ignore this
18720Sstevel@tonic-gate 	 * for now, till ip_open is ready to make it visible globally.
18730Sstevel@tonic-gate 	 */
18740Sstevel@tonic-gate 	connp->conn_state_flags |= CONN_INCIPIENT;
18750Sstevel@tonic-gate 
18760Sstevel@tonic-gate 	/* Insert at the head of the list */
18770Sstevel@tonic-gate 	mutex_enter(&ipcl_globalhash_fanout[index].connf_lock);
18780Sstevel@tonic-gate 	connp->conn_g_next = ipcl_globalhash_fanout[index].connf_head;
18790Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
18800Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp;
18810Sstevel@tonic-gate 	ipcl_globalhash_fanout[index].connf_head = connp;
18820Sstevel@tonic-gate 
18830Sstevel@tonic-gate 	/* The fanout bucket this conn points to */
18840Sstevel@tonic-gate 	connp->conn_g_fanout = &ipcl_globalhash_fanout[index];
18850Sstevel@tonic-gate 
18860Sstevel@tonic-gate 	mutex_exit(&ipcl_globalhash_fanout[index].connf_lock);
18870Sstevel@tonic-gate }
18880Sstevel@tonic-gate 
18890Sstevel@tonic-gate void
18900Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp)
18910Sstevel@tonic-gate {
18920Sstevel@tonic-gate 	/*
18930Sstevel@tonic-gate 	 * We were never inserted in the global multi list.
18940Sstevel@tonic-gate 	 * IPCL_NONE variety is never inserted in the global multilist
18950Sstevel@tonic-gate 	 * since it is presumed to not need any cleanup and is transient.
18960Sstevel@tonic-gate 	 */
18970Sstevel@tonic-gate 	if (connp->conn_g_fanout == NULL)
18980Sstevel@tonic-gate 		return;
18990Sstevel@tonic-gate 
19000Sstevel@tonic-gate 	mutex_enter(&connp->conn_g_fanout->connf_lock);
19010Sstevel@tonic-gate 	if (connp->conn_g_prev != NULL)
19020Sstevel@tonic-gate 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
19030Sstevel@tonic-gate 	else
19040Sstevel@tonic-gate 		connp->conn_g_fanout->connf_head = connp->conn_g_next;
19050Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
19060Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
19070Sstevel@tonic-gate 	mutex_exit(&connp->conn_g_fanout->connf_lock);
19080Sstevel@tonic-gate 
19090Sstevel@tonic-gate 	/* Better to stumble on a null pointer than to corrupt memory */
19100Sstevel@tonic-gate 	connp->conn_g_next = NULL;
19110Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
19120Sstevel@tonic-gate }
19130Sstevel@tonic-gate 
19140Sstevel@tonic-gate /*
19150Sstevel@tonic-gate  * Walk the list of all conn_t's in the system, calling the function provided
19160Sstevel@tonic-gate  * with the specified argument for each.
19170Sstevel@tonic-gate  * Applies to both IPv4 and IPv6.
19180Sstevel@tonic-gate  *
19190Sstevel@tonic-gate  * IPCs may hold pointers to ipif/ill. To guard against stale pointers
19200Sstevel@tonic-gate  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
19210Sstevel@tonic-gate  * unplumbed or removed. New conn_t's that are created while we are walking
19220Sstevel@tonic-gate  * may be missed by this walk, because they are not necessarily inserted
19230Sstevel@tonic-gate  * at the tail of the list. They are new conn_t's and thus don't have any
19240Sstevel@tonic-gate  * stale pointers. The CONN_CLOSING flag ensures that no new reference
19250Sstevel@tonic-gate  * is created to the struct that is going away.
19260Sstevel@tonic-gate  */
19270Sstevel@tonic-gate void
19280Sstevel@tonic-gate ipcl_walk(pfv_t func, void *arg)
19290Sstevel@tonic-gate {
19300Sstevel@tonic-gate 	int	i;
19310Sstevel@tonic-gate 	conn_t	*connp;
19320Sstevel@tonic-gate 	conn_t	*prev_connp;
19330Sstevel@tonic-gate 
19340Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
19350Sstevel@tonic-gate 		mutex_enter(&ipcl_globalhash_fanout[i].connf_lock);
19360Sstevel@tonic-gate 		prev_connp = NULL;
19370Sstevel@tonic-gate 		connp = ipcl_globalhash_fanout[i].connf_head;
19380Sstevel@tonic-gate 		while (connp != NULL) {
19390Sstevel@tonic-gate 			mutex_enter(&connp->conn_lock);
19400Sstevel@tonic-gate 			if (connp->conn_state_flags &
19410Sstevel@tonic-gate 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
19420Sstevel@tonic-gate 				mutex_exit(&connp->conn_lock);
19430Sstevel@tonic-gate 				connp = connp->conn_g_next;
19440Sstevel@tonic-gate 				continue;
19450Sstevel@tonic-gate 			}
19460Sstevel@tonic-gate 			CONN_INC_REF_LOCKED(connp);
19470Sstevel@tonic-gate 			mutex_exit(&connp->conn_lock);
19480Sstevel@tonic-gate 			mutex_exit(&ipcl_globalhash_fanout[i].connf_lock);
19490Sstevel@tonic-gate 			(*func)(connp, arg);
19500Sstevel@tonic-gate 			if (prev_connp != NULL)
19510Sstevel@tonic-gate 				CONN_DEC_REF(prev_connp);
19520Sstevel@tonic-gate 			mutex_enter(&ipcl_globalhash_fanout[i].connf_lock);
19530Sstevel@tonic-gate 			prev_connp = connp;
19540Sstevel@tonic-gate 			connp = connp->conn_g_next;
19550Sstevel@tonic-gate 		}
19560Sstevel@tonic-gate 		mutex_exit(&ipcl_globalhash_fanout[i].connf_lock);
19570Sstevel@tonic-gate 		if (prev_connp != NULL)
19580Sstevel@tonic-gate 			CONN_DEC_REF(prev_connp);
19590Sstevel@tonic-gate 	}
19600Sstevel@tonic-gate }
19610Sstevel@tonic-gate 
19620Sstevel@tonic-gate /*
19630Sstevel@tonic-gate  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
19640Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
19650Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
19660Sstevel@tonic-gate  * (peer tcp in at least ESTABLISHED state).
19670Sstevel@tonic-gate  */
19680Sstevel@tonic-gate conn_t *
19690Sstevel@tonic-gate ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph)
19700Sstevel@tonic-gate {
19710Sstevel@tonic-gate 	uint32_t ports;
19720Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
19730Sstevel@tonic-gate 	connf_t	*connfp;
19740Sstevel@tonic-gate 	conn_t	*tconnp;
19750Sstevel@tonic-gate 	boolean_t zone_chk;
19760Sstevel@tonic-gate 
19770Sstevel@tonic-gate 	/*
19780Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
19790Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
19800Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
19810Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.
19820Sstevel@tonic-gate 	 */
19830Sstevel@tonic-gate 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
19840Sstevel@tonic-gate 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
19850Sstevel@tonic-gate 
19860Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
19870Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
19880Sstevel@tonic-gate 
19890Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)];
19900Sstevel@tonic-gate 
19910Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
19920Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
19930Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
19940Sstevel@tonic-gate 
19950Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
19960Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
19970Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED &&
19980Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
19990Sstevel@tonic-gate 
20000Sstevel@tonic-gate 			ASSERT(tconnp != connp);
20010Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
20020Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
20030Sstevel@tonic-gate 			return (tconnp);
20040Sstevel@tonic-gate 		}
20050Sstevel@tonic-gate 	}
20060Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
20070Sstevel@tonic-gate 	return (NULL);
20080Sstevel@tonic-gate }
20090Sstevel@tonic-gate 
20100Sstevel@tonic-gate /*
20110Sstevel@tonic-gate  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
20120Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
20130Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
20140Sstevel@tonic-gate  * (peer tcp in at least ESTABLISHED state).
20150Sstevel@tonic-gate  */
20160Sstevel@tonic-gate conn_t *
20170Sstevel@tonic-gate ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph)
20180Sstevel@tonic-gate {
20190Sstevel@tonic-gate 	uint32_t ports;
20200Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
20210Sstevel@tonic-gate 	connf_t	*connfp;
20220Sstevel@tonic-gate 	conn_t	*tconnp;
20230Sstevel@tonic-gate 	boolean_t zone_chk;
20240Sstevel@tonic-gate 
20250Sstevel@tonic-gate 	/*
20260Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
20270Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
20280Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
20290Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.  We
20300Sstevel@tonic-gate 	 * don't do Zone check for link local address(es) because the
20310Sstevel@tonic-gate 	 * current Zone implementation treats each link local address as
20320Sstevel@tonic-gate 	 * being unique per system node, i.e. they belong to global Zone.
20330Sstevel@tonic-gate 	 */
20340Sstevel@tonic-gate 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
20350Sstevel@tonic-gate 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
20360Sstevel@tonic-gate 
20370Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
20380Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
20390Sstevel@tonic-gate 
20400Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)];
20410Sstevel@tonic-gate 
20420Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
20430Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
20440Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
20450Sstevel@tonic-gate 
20460Sstevel@tonic-gate 		/* We skip tcp_bound_if check here as this is loopback tcp */
20470Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
20480Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
20490Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED &&
20500Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
20510Sstevel@tonic-gate 
20520Sstevel@tonic-gate 			ASSERT(tconnp != connp);
20530Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
20540Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
20550Sstevel@tonic-gate 			return (tconnp);
20560Sstevel@tonic-gate 		}
20570Sstevel@tonic-gate 	}
20580Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
20590Sstevel@tonic-gate 	return (NULL);
20600Sstevel@tonic-gate }
20610Sstevel@tonic-gate 
20620Sstevel@tonic-gate /*
20630Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
20640Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
20650Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
20660Sstevel@tonic-gate  */
20670Sstevel@tonic-gate conn_t *
20680Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state)
20690Sstevel@tonic-gate {
20700Sstevel@tonic-gate 	uint32_t ports;
20710Sstevel@tonic-gate 	uint16_t *pports;
20720Sstevel@tonic-gate 	connf_t	*connfp;
20730Sstevel@tonic-gate 	conn_t	*tconnp;
20740Sstevel@tonic-gate 
20750Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
20760Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
20770Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
20780Sstevel@tonic-gate 
20790Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)];
20800Sstevel@tonic-gate 
20810Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
20820Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
20830Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
20840Sstevel@tonic-gate 
20850Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
20860Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
20870Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= min_state) {
20880Sstevel@tonic-gate 
20890Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
20900Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
20910Sstevel@tonic-gate 			return (tconnp);
20920Sstevel@tonic-gate 		}
20930Sstevel@tonic-gate 	}
20940Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
20950Sstevel@tonic-gate 	return (NULL);
20960Sstevel@tonic-gate }
20970Sstevel@tonic-gate 
20980Sstevel@tonic-gate /*
20990Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
21000Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
21010Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
21020Sstevel@tonic-gate  * Match on ifindex in addition to addresses.
21030Sstevel@tonic-gate  */
21040Sstevel@tonic-gate conn_t *
21050Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
21060Sstevel@tonic-gate     uint_t ifindex)
21070Sstevel@tonic-gate {
21080Sstevel@tonic-gate 	tcp_t	*tcp;
21090Sstevel@tonic-gate 	uint32_t ports;
21100Sstevel@tonic-gate 	uint16_t *pports;
21110Sstevel@tonic-gate 	connf_t	*connfp;
21120Sstevel@tonic-gate 	conn_t	*tconnp;
21130Sstevel@tonic-gate 
21140Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
21150Sstevel@tonic-gate 	pports[0] = tcpha->tha_fport;
21160Sstevel@tonic-gate 	pports[1] = tcpha->tha_lport;
21170Sstevel@tonic-gate 
21180Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)];
21190Sstevel@tonic-gate 
21200Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
21210Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
21220Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
21230Sstevel@tonic-gate 
21240Sstevel@tonic-gate 		tcp = tconnp->conn_tcp;
21250Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
21260Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
21270Sstevel@tonic-gate 		    tcp->tcp_state >= min_state &&
21280Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
21290Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex)) {
21300Sstevel@tonic-gate 
21310Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
21320Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
21330Sstevel@tonic-gate 			return (tconnp);
21340Sstevel@tonic-gate 		}
21350Sstevel@tonic-gate 	}
21360Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
21370Sstevel@tonic-gate 	return (NULL);
21380Sstevel@tonic-gate }
21390Sstevel@tonic-gate 
21400Sstevel@tonic-gate /*
2141*1676Sjpk  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2142*1676Sjpk  * a listener when changing state.
21430Sstevel@tonic-gate  */
21440Sstevel@tonic-gate conn_t *
21450Sstevel@tonic-gate ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid)
21460Sstevel@tonic-gate {
21470Sstevel@tonic-gate 	connf_t		*bind_connfp;
21480Sstevel@tonic-gate 	conn_t		*connp;
21490Sstevel@tonic-gate 	tcp_t		*tcp;
21500Sstevel@tonic-gate 
21510Sstevel@tonic-gate 	/*
21520Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
21530Sstevel@tonic-gate 	 * all zeros.
21540Sstevel@tonic-gate 	 */
21550Sstevel@tonic-gate 	if (laddr == 0)
21560Sstevel@tonic-gate 		return (NULL);
21570Sstevel@tonic-gate 
2158*1676Sjpk 	ASSERT(zoneid != ALL_ZONES);
2159*1676Sjpk 
21600Sstevel@tonic-gate 	bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
21610Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
21620Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
21630Sstevel@tonic-gate 	    connp = connp->conn_next) {
21640Sstevel@tonic-gate 		tcp = connp->conn_tcp;
21650Sstevel@tonic-gate 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
21660Sstevel@tonic-gate 		    connp->conn_zoneid == zoneid &&
21670Sstevel@tonic-gate 		    (tcp->tcp_listener == NULL)) {
21680Sstevel@tonic-gate 			CONN_INC_REF(connp);
21690Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
21700Sstevel@tonic-gate 			return (connp);
21710Sstevel@tonic-gate 		}
21720Sstevel@tonic-gate 	}
21730Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
21740Sstevel@tonic-gate 	return (NULL);
21750Sstevel@tonic-gate }
21760Sstevel@tonic-gate 
2177*1676Sjpk /*
2178*1676Sjpk  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2179*1676Sjpk  * a listener when changing state.
2180*1676Sjpk  */
21810Sstevel@tonic-gate conn_t *
21820Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
21830Sstevel@tonic-gate     zoneid_t zoneid)
21840Sstevel@tonic-gate {
21850Sstevel@tonic-gate 	connf_t		*bind_connfp;
21860Sstevel@tonic-gate 	conn_t		*connp = NULL;
21870Sstevel@tonic-gate 	tcp_t		*tcp;
21880Sstevel@tonic-gate 
21890Sstevel@tonic-gate 	/*
21900Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
21910Sstevel@tonic-gate 	 * all zeros.
21920Sstevel@tonic-gate 	 */
21930Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
21940Sstevel@tonic-gate 		return (NULL);
21950Sstevel@tonic-gate 
2196*1676Sjpk 	ASSERT(zoneid != ALL_ZONES);
21970Sstevel@tonic-gate 
21980Sstevel@tonic-gate 	bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
21990Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
22000Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
22010Sstevel@tonic-gate 	    connp = connp->conn_next) {
22020Sstevel@tonic-gate 		tcp = connp->conn_tcp;
22030Sstevel@tonic-gate 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
22040Sstevel@tonic-gate 		    connp->conn_zoneid == zoneid &&
22050Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
22060Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex) &&
22070Sstevel@tonic-gate 		    tcp->tcp_listener == NULL) {
22080Sstevel@tonic-gate 			CONN_INC_REF(connp);
22090Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
22100Sstevel@tonic-gate 			return (connp);
22110Sstevel@tonic-gate 		}
22120Sstevel@tonic-gate 	}
22130Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
22140Sstevel@tonic-gate 	return (NULL);
22150Sstevel@tonic-gate }
22160Sstevel@tonic-gate 
2217741Smasputra /*
2218741Smasputra  * ipcl_get_next_conn
2219741Smasputra  *	get the next entry in the conn global list
2220741Smasputra  *	and put a reference on the next_conn.
2221741Smasputra  *	decrement the reference on the current conn.
2222741Smasputra  *
2223741Smasputra  * This is an iterator based walker function that also provides for
2224741Smasputra  * some selection by the caller. It walks through the conn_hash bucket
2225741Smasputra  * searching for the next valid connp in the list, and selects connections
2226741Smasputra  * that are neither closed nor condemned. It also REFHOLDS the conn
2227741Smasputra  * thus ensuring that the conn exists when the caller uses the conn.
2228741Smasputra  */
2229741Smasputra conn_t *
2230741Smasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2231741Smasputra {
2232741Smasputra 	conn_t	*next_connp;
2233741Smasputra 
2234741Smasputra 	if (connfp == NULL)
2235741Smasputra 		return (NULL);
2236741Smasputra 
2237741Smasputra 	mutex_enter(&connfp->connf_lock);
2238741Smasputra 
2239741Smasputra 	next_connp = (connp == NULL) ?
2240741Smasputra 	    connfp->connf_head : connp->conn_g_next;
2241741Smasputra 
2242741Smasputra 	while (next_connp != NULL) {
2243741Smasputra 		mutex_enter(&next_connp->conn_lock);
2244741Smasputra 		if (!(next_connp->conn_flags & conn_flags) ||
2245741Smasputra 		    (next_connp->conn_state_flags &
2246741Smasputra 		    (CONN_CONDEMNED | CONN_INCIPIENT))) {
2247741Smasputra 			/*
2248741Smasputra 			 * This conn has been condemned or
2249741Smasputra 			 * is closing, or the flags don't match
2250741Smasputra 			 */
2251741Smasputra 			mutex_exit(&next_connp->conn_lock);
2252741Smasputra 			next_connp = next_connp->conn_g_next;
2253741Smasputra 			continue;
2254741Smasputra 		}
2255741Smasputra 		CONN_INC_REF_LOCKED(next_connp);
2256741Smasputra 		mutex_exit(&next_connp->conn_lock);
2257741Smasputra 		break;
2258741Smasputra 	}
2259741Smasputra 
2260741Smasputra 	mutex_exit(&connfp->connf_lock);
2261741Smasputra 
2262741Smasputra 	if (connp != NULL)
2263741Smasputra 		CONN_DEC_REF(connp);
2264741Smasputra 
2265741Smasputra 	return (next_connp);
2266741Smasputra }
2267741Smasputra 
22680Sstevel@tonic-gate #ifdef CONN_DEBUG
22690Sstevel@tonic-gate /*
22700Sstevel@tonic-gate  * Trace of the last NBUF refhold/refrele
22710Sstevel@tonic-gate  */
22720Sstevel@tonic-gate int
22730Sstevel@tonic-gate conn_trace_ref(conn_t *connp)
22740Sstevel@tonic-gate {
22750Sstevel@tonic-gate 	int	last;
22760Sstevel@tonic-gate 	conn_trace_t	*ctb;
22770Sstevel@tonic-gate 
22780Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
22790Sstevel@tonic-gate 	last = connp->conn_trace_last;
22800Sstevel@tonic-gate 	last++;
22810Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
22820Sstevel@tonic-gate 		last = 0;
22830Sstevel@tonic-gate 
22840Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
22850Sstevel@tonic-gate 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH);
22860Sstevel@tonic-gate 	connp->conn_trace_last = last;
22870Sstevel@tonic-gate 	return (1);
22880Sstevel@tonic-gate }
22890Sstevel@tonic-gate 
22900Sstevel@tonic-gate int
22910Sstevel@tonic-gate conn_untrace_ref(conn_t *connp)
22920Sstevel@tonic-gate {
22930Sstevel@tonic-gate 	int	last;
22940Sstevel@tonic-gate 	conn_trace_t	*ctb;
22950Sstevel@tonic-gate 
22960Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
22970Sstevel@tonic-gate 	last = connp->conn_trace_last;
22980Sstevel@tonic-gate 	last++;
22990Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
23000Sstevel@tonic-gate 		last = 0;
23010Sstevel@tonic-gate 
23020Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
23030Sstevel@tonic-gate 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH);
23040Sstevel@tonic-gate 	connp->conn_trace_last = last;
23050Sstevel@tonic-gate 	return (1);
23060Sstevel@tonic-gate }
23070Sstevel@tonic-gate #endif
2308