xref: /onnv-gate/usr/src/uts/common/inet/ip/ipclassifier.c (revision 153:b7f7b242faa2)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
23*153Sethindra  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate const char ipclassifier_version[] = "@(#)ipclassifier.c	1.6	04/03/31 SMI";
300Sstevel@tonic-gate 
310Sstevel@tonic-gate /*
320Sstevel@tonic-gate  * IP PACKET CLASSIFIER
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * The IP packet classifier provides mapping between IP packets and persistent
350Sstevel@tonic-gate  * connection state for connection-oriented protocols. It also provides
360Sstevel@tonic-gate  * interface for managing connection states.
370Sstevel@tonic-gate  *
380Sstevel@tonic-gate  * The connection state is kept in conn_t data structure and contains, among
390Sstevel@tonic-gate  * other things:
400Sstevel@tonic-gate  *
410Sstevel@tonic-gate  *	o local/remote address and ports
420Sstevel@tonic-gate  *	o Transport protocol
430Sstevel@tonic-gate  *	o squeue for the connection (for TCP only)
440Sstevel@tonic-gate  *	o reference counter
450Sstevel@tonic-gate  *	o Connection state
460Sstevel@tonic-gate  *	o hash table linkage
470Sstevel@tonic-gate  *	o interface/ire information
480Sstevel@tonic-gate  *	o credentials
490Sstevel@tonic-gate  *	o ipsec policy
500Sstevel@tonic-gate  *	o send and receive functions.
510Sstevel@tonic-gate  *	o mutex lock.
520Sstevel@tonic-gate  *
530Sstevel@tonic-gate  * Connections use a reference counting scheme. They are freed when the
540Sstevel@tonic-gate  * reference counter drops to zero. A reference is incremented when connection
550Sstevel@tonic-gate  * is placed in a list or table, when incoming packet for the connection arrives
560Sstevel@tonic-gate  * and when connection is processed via squeue (squeue processing may be
570Sstevel@tonic-gate  * asynchronous and the reference protects the connection from being destroyed
580Sstevel@tonic-gate  * before its processing is finished).
590Sstevel@tonic-gate  *
600Sstevel@tonic-gate  * send and receive functions are currently used for TCP only. The send function
610Sstevel@tonic-gate  * determines the IP entry point for the packet once it leaves TCP to be sent to
620Sstevel@tonic-gate  * the destination address. The receive function is used by IP when the packet
630Sstevel@tonic-gate  * should be passed for TCP processing. When a new connection is created these
640Sstevel@tonic-gate  * are set to ip_output() and tcp_input() respectively. During the lifetime of
650Sstevel@tonic-gate  * the connection the send and receive functions may change depending on the
660Sstevel@tonic-gate  * changes in the connection state. For example, Once the connection is bound to
670Sstevel@tonic-gate  * an addresse, the receive function for this connection is set to
680Sstevel@tonic-gate  * tcp_conn_request().  This allows incoming SYNs to go directly into the
690Sstevel@tonic-gate  * listener SYN processing function without going to tcp_input() first.
700Sstevel@tonic-gate  *
710Sstevel@tonic-gate  * Classifier uses several hash tables:
720Sstevel@tonic-gate  *
730Sstevel@tonic-gate  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
740Sstevel@tonic-gate  *	ipcl_bind_fanout:	contains all connections in BOUND state
750Sstevel@tonic-gate  *	ipcl_proto_fanout:	IPv4 protocol fanout
760Sstevel@tonic-gate  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
770Sstevel@tonic-gate  *	ipcl_udp_fanout:	contains all UDP connections
780Sstevel@tonic-gate  *	ipcl_globalhash_fanout:	contains all connections
790Sstevel@tonic-gate  *
800Sstevel@tonic-gate  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
810Sstevel@tonic-gate  * which need to view all existing connections.
820Sstevel@tonic-gate  *
830Sstevel@tonic-gate  * All tables are protected by per-bucket locks. When both per-bucket lock and
840Sstevel@tonic-gate  * connection lock need to be held, the per-bucket lock should be acquired
850Sstevel@tonic-gate  * first, followed by the connection lock.
860Sstevel@tonic-gate  *
870Sstevel@tonic-gate  * All functions doing search in one of these tables increment a reference
880Sstevel@tonic-gate  * counter on the connection found (if any). This reference should be dropped
890Sstevel@tonic-gate  * when the caller has finished processing the connection.
900Sstevel@tonic-gate  *
910Sstevel@tonic-gate  *
920Sstevel@tonic-gate  * INTERFACES:
930Sstevel@tonic-gate  * ===========
940Sstevel@tonic-gate  *
950Sstevel@tonic-gate  * Connection Lookup:
960Sstevel@tonic-gate  * ------------------
970Sstevel@tonic-gate  *
980Sstevel@tonic-gate  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid)
990Sstevel@tonic-gate  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid)
1000Sstevel@tonic-gate  *
1010Sstevel@tonic-gate  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
1020Sstevel@tonic-gate  * it can't find any associated connection. If the connection is found, its
1030Sstevel@tonic-gate  * reference counter is incremented.
1040Sstevel@tonic-gate  *
1050Sstevel@tonic-gate  *	mp:	mblock, containing packet header. The full header should fit
1060Sstevel@tonic-gate  *		into a single mblock. It should also contain at least full IP
1070Sstevel@tonic-gate  *		and TCP or UDP header.
1080Sstevel@tonic-gate  *
1090Sstevel@tonic-gate  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
1100Sstevel@tonic-gate  *
1110Sstevel@tonic-gate  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
1120Sstevel@tonic-gate  *		 the packet.
1130Sstevel@tonic-gate  *
1140Sstevel@tonic-gate  * 	zoneid: The zone in which the returned connection must be.
1150Sstevel@tonic-gate  *
1160Sstevel@tonic-gate  *	For TCP connections, the lookup order is as follows:
1170Sstevel@tonic-gate  *		5-tuple {src, dst, protocol, local port, remote port}
1180Sstevel@tonic-gate  *			lookup in ipcl_conn_fanout table.
1190Sstevel@tonic-gate  *		3-tuple {dst, remote port, protocol} lookup in
1200Sstevel@tonic-gate  *			ipcl_bind_fanout table.
1210Sstevel@tonic-gate  *
1220Sstevel@tonic-gate  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
1230Sstevel@tonic-gate  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
1240Sstevel@tonic-gate  *	these interfaces do not handle cases where a packets belongs
1250Sstevel@tonic-gate  *	to multiple UDP clients, which is handled in IP itself.
1260Sstevel@tonic-gate  *
1270Sstevel@tonic-gate  * conn_t	*ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int);
1280Sstevel@tonic-gate  * conn_t	*ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t);
1290Sstevel@tonic-gate  *
1300Sstevel@tonic-gate  *	Lookup routine to find a exact match for {src, dst, local port,
1310Sstevel@tonic-gate  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
1320Sstevel@tonic-gate  *	ports are read from the IP and TCP header respectively.
1330Sstevel@tonic-gate  *
1340Sstevel@tonic-gate  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol);
1350Sstevel@tonic-gate  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex);
1360Sstevel@tonic-gate  *
1370Sstevel@tonic-gate  * 	Lookup routine to find a listener with the tuple {lport, laddr,
1380Sstevel@tonic-gate  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
1390Sstevel@tonic-gate  * 	parameter interface index is also compared.
1400Sstevel@tonic-gate  *
1410Sstevel@tonic-gate  * void ipcl_walk(func, arg)
1420Sstevel@tonic-gate  *
1430Sstevel@tonic-gate  * 	Apply 'func' to every connection available. The 'func' is called as
1440Sstevel@tonic-gate  *	(*func)(connp, arg). The walk is non-atomic so connections may be
1450Sstevel@tonic-gate  *	created and destroyed during the walk. The CONN_CONDEMNED and
1460Sstevel@tonic-gate  *	CONN_INCIPIENT flags ensure that connections which are newly created
1470Sstevel@tonic-gate  *	or being destroyed are not selected by the walker.
1480Sstevel@tonic-gate  *
1490Sstevel@tonic-gate  * Table Updates
1500Sstevel@tonic-gate  * -------------
1510Sstevel@tonic-gate  *
1520Sstevel@tonic-gate  * int ipcl_conn_insert(connp, protocol, src, dst, ports)
1530Sstevel@tonic-gate  * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex)
1540Sstevel@tonic-gate  *
1550Sstevel@tonic-gate  *	Insert 'connp' in the ipcl_conn_fanout.
1560Sstevel@tonic-gate  *	Arguements :
1570Sstevel@tonic-gate  *		connp		conn_t to be inserted
1580Sstevel@tonic-gate  *		protocol	connection protocol
1590Sstevel@tonic-gate  *		src		source address
1600Sstevel@tonic-gate  *		dst		destination address
1610Sstevel@tonic-gate  *		ports		local and remote port
1620Sstevel@tonic-gate  *		ifindex		interface index for IPv6 connections
1630Sstevel@tonic-gate  *
1640Sstevel@tonic-gate  *	Return value :
1650Sstevel@tonic-gate  *		0		if connp was inserted
1660Sstevel@tonic-gate  *		EADDRINUSE	if the connection with the same tuple
1670Sstevel@tonic-gate  *				already exists.
1680Sstevel@tonic-gate  *
1690Sstevel@tonic-gate  * int ipcl_bind_insert(connp, protocol, src, lport);
1700Sstevel@tonic-gate  * int ipcl_bind_insert_v6(connp, protocol, src, lport);
1710Sstevel@tonic-gate  *
1720Sstevel@tonic-gate  * 	Insert 'connp' in ipcl_bind_fanout.
1730Sstevel@tonic-gate  * 	Arguements :
1740Sstevel@tonic-gate  * 		connp		conn_t to be inserted
1750Sstevel@tonic-gate  * 		protocol	connection protocol
1760Sstevel@tonic-gate  * 		src		source address connection wants
1770Sstevel@tonic-gate  * 				to bind to
1780Sstevel@tonic-gate  * 		lport		local port connection wants to
1790Sstevel@tonic-gate  * 				bind to
1800Sstevel@tonic-gate  *
1810Sstevel@tonic-gate  *
1820Sstevel@tonic-gate  * void ipcl_hash_remove(connp);
1830Sstevel@tonic-gate  *
1840Sstevel@tonic-gate  * 	Removes the 'connp' from the connection fanout table.
1850Sstevel@tonic-gate  *
1860Sstevel@tonic-gate  * Connection Creation/Destruction
1870Sstevel@tonic-gate  * -------------------------------
1880Sstevel@tonic-gate  *
1890Sstevel@tonic-gate  * conn_t *ipcl_conn_create(type, sleep)
1900Sstevel@tonic-gate  *
1910Sstevel@tonic-gate  * 	Creates a new conn based on the type flag, inserts it into
1920Sstevel@tonic-gate  * 	globalhash table.
1930Sstevel@tonic-gate  *
1940Sstevel@tonic-gate  *	type:	This flag determines the type of conn_t which needs to be
1950Sstevel@tonic-gate  *		created.
1960Sstevel@tonic-gate  *		IPCL_TCPCONN	indicates a TCP connection
1970Sstevel@tonic-gate  *		IPCL_IPCONN	indicates all non-TCP connections.
1980Sstevel@tonic-gate  *
1990Sstevel@tonic-gate  * void ipcl_conn_destroy(connp)
2000Sstevel@tonic-gate  *
2010Sstevel@tonic-gate  * 	Destroys the connection state, removes it from the global
2020Sstevel@tonic-gate  * 	connection hash table and frees its memory.
2030Sstevel@tonic-gate  */
2040Sstevel@tonic-gate 
2050Sstevel@tonic-gate #include <sys/types.h>
2060Sstevel@tonic-gate #include <sys/stream.h>
2070Sstevel@tonic-gate #include <sys/dlpi.h>
2080Sstevel@tonic-gate #include <sys/stropts.h>
2090Sstevel@tonic-gate #include <sys/sysmacros.h>
2100Sstevel@tonic-gate #include <sys/strsubr.h>
2110Sstevel@tonic-gate #include <sys/strlog.h>
2120Sstevel@tonic-gate #include <sys/strsun.h>
2130Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
2140Sstevel@tonic-gate #include <sys/ddi.h>
2150Sstevel@tonic-gate #include <sys/cmn_err.h>
2160Sstevel@tonic-gate #include <sys/debug.h>
2170Sstevel@tonic-gate 
2180Sstevel@tonic-gate #include <sys/systm.h>
2190Sstevel@tonic-gate #include <sys/param.h>
2200Sstevel@tonic-gate #include <sys/kmem.h>
2210Sstevel@tonic-gate #include <sys/isa_defs.h>
2220Sstevel@tonic-gate #include <inet/common.h>
2230Sstevel@tonic-gate #include <netinet/ip6.h>
2240Sstevel@tonic-gate #include <netinet/icmp6.h>
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate #include <inet/ip.h>
2270Sstevel@tonic-gate #include <inet/ip6.h>
2280Sstevel@tonic-gate #include <inet/tcp.h>
2290Sstevel@tonic-gate #include <inet/tcp_trace.h>
2300Sstevel@tonic-gate #include <inet/ip_multi.h>
2310Sstevel@tonic-gate #include <inet/ip_if.h>
2320Sstevel@tonic-gate #include <inet/ip_ire.h>
2330Sstevel@tonic-gate #include <inet/ip_rts.h>
2340Sstevel@tonic-gate #include <inet/optcom.h>
2350Sstevel@tonic-gate #include <inet/ip_ndp.h>
2360Sstevel@tonic-gate #include <inet/sctp_ip.h>
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate #include <sys/ethernet.h>
2390Sstevel@tonic-gate #include <net/if_types.h>
2400Sstevel@tonic-gate #include <sys/cpuvar.h>
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate #include <inet/mi.h>
2430Sstevel@tonic-gate #include <inet/ipclassifier.h>
2440Sstevel@tonic-gate #include <inet/ipsec_impl.h>
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate #ifdef DEBUG
2470Sstevel@tonic-gate #define	IPCL_DEBUG
2480Sstevel@tonic-gate #else
2490Sstevel@tonic-gate #undef	IPCL_DEBUG
2500Sstevel@tonic-gate #endif
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate #ifdef	IPCL_DEBUG
2530Sstevel@tonic-gate int	ipcl_debug_level = 0;
2540Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args)	\
2550Sstevel@tonic-gate 	if (ipcl_debug_level  & level) { printf args; }
2560Sstevel@tonic-gate #else
2570Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args) {; }
2580Sstevel@tonic-gate #endif
2590Sstevel@tonic-gate connf_t	*ipcl_conn_fanout;
2600Sstevel@tonic-gate connf_t	*ipcl_bind_fanout;
2610Sstevel@tonic-gate connf_t	ipcl_proto_fanout[IPPROTO_MAX + 1];
2620Sstevel@tonic-gate connf_t	ipcl_proto_fanout_v6[IPPROTO_MAX + 1];
2630Sstevel@tonic-gate connf_t	*ipcl_udp_fanout;
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate /* A separate hash list for raw socket. */
2660Sstevel@tonic-gate connf_t *ipcl_raw_fanout;
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate connf_t rts_clients;
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate /* Old value for compatibility */
2710Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0;
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate /* New value. Zero means choose automatically. */
2740Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0;
2750Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192;
2760Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500;
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate uint_t ipcl_conn_fanout_size = 0;
2790Sstevel@tonic-gate 
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate /* bind/udp fanout table size */
2820Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512;
2830Sstevel@tonic-gate uint_t ipcl_udp_fanout_size = 256;
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate /* Raw socket fanout size.  Must be a power of 2. */
2860Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256;
2870Sstevel@tonic-gate 
2880Sstevel@tonic-gate /*
2890Sstevel@tonic-gate  * Power of 2^N Primes useful for hashing for N of 0-28,
2900Sstevel@tonic-gate  * these primes are the nearest prime <= 2^N - 2^(N-2).
2910Sstevel@tonic-gate  */
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
2940Sstevel@tonic-gate 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
2950Sstevel@tonic-gate 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
2960Sstevel@tonic-gate 		50331599, 100663291, 201326557, 0}
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate /*
2990Sstevel@tonic-gate  * wrapper structure to ensure that conn+tcpb are aligned
3000Sstevel@tonic-gate  * on cache lines.
3010Sstevel@tonic-gate  */
3020Sstevel@tonic-gate typedef struct itc_s {
3030Sstevel@tonic-gate 	union {
3040Sstevel@tonic-gate 		conn_t	itcu_conn;
3050Sstevel@tonic-gate 		char	itcu_filler[CACHE_ALIGN(conn_s)];
3060Sstevel@tonic-gate 	}	itc_u;
3070Sstevel@tonic-gate 	tcp_t	itc_tcp;
3080Sstevel@tonic-gate } itc_t;
3090Sstevel@tonic-gate 
3100Sstevel@tonic-gate #define	itc_conn	itc_u.itcu_conn
3110Sstevel@tonic-gate 
3120Sstevel@tonic-gate struct kmem_cache  *ipcl_tcpconn_cache;
3130Sstevel@tonic-gate struct kmem_cache  *ipcl_tcp_cache;
3140Sstevel@tonic-gate struct kmem_cache  *ipcl_conn_cache;
3150Sstevel@tonic-gate extern struct kmem_cache  *sctp_conn_cache;
3160Sstevel@tonic-gate extern struct kmem_cache  *tcp_sack_info_cache;
3170Sstevel@tonic-gate extern struct kmem_cache  *tcp_iphc_cache;
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate extern void	tcp_timermp_free(tcp_t *);
3200Sstevel@tonic-gate extern mblk_t	*tcp_timermp_alloc(int);
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate static int	ipcl_tcpconn_constructor(void *, void *, int);
3230Sstevel@tonic-gate static void	ipcl_tcpconn_destructor(void *, void *);
3240Sstevel@tonic-gate 
3250Sstevel@tonic-gate static int conn_g_index;
3260Sstevel@tonic-gate connf_t	*ipcl_globalhash_fanout;
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate #ifdef	IPCL_DEBUG
3290Sstevel@tonic-gate #define	INET_NTOA_BUFSIZE	18
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate static char *
3320Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b)
3330Sstevel@tonic-gate {
3340Sstevel@tonic-gate 	unsigned char	*p;
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 	p = (unsigned char *)&in;
3370Sstevel@tonic-gate 	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
3380Sstevel@tonic-gate 	return (b);
3390Sstevel@tonic-gate }
3400Sstevel@tonic-gate #endif
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate /*
3430Sstevel@tonic-gate  * ipclassifier intialization routine, sets up hash tables and
3440Sstevel@tonic-gate  * conn caches.
3450Sstevel@tonic-gate  */
3460Sstevel@tonic-gate void
3470Sstevel@tonic-gate ipcl_init(void)
3480Sstevel@tonic-gate {
3490Sstevel@tonic-gate 	int i;
3500Sstevel@tonic-gate 	int sizes[] = P2Ps();
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate 	ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache",
3530Sstevel@tonic-gate 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
3540Sstevel@tonic-gate 	    NULL, NULL,
3550Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
3560Sstevel@tonic-gate 
3570Sstevel@tonic-gate 	ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache",
3580Sstevel@tonic-gate 	    sizeof (itc_t), CACHE_ALIGN_SIZE,
3590Sstevel@tonic-gate 	    ipcl_tcpconn_constructor, ipcl_tcpconn_destructor,
3600Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
3610Sstevel@tonic-gate 
3620Sstevel@tonic-gate 	/*
3630Sstevel@tonic-gate 	 * Calculate size of conn fanout table.
3640Sstevel@tonic-gate 	 */
3650Sstevel@tonic-gate 	if (ipcl_conn_hash_size != 0) {
3660Sstevel@tonic-gate 		ipcl_conn_fanout_size = ipcl_conn_hash_size;
3670Sstevel@tonic-gate 	} else if (tcp_conn_hash_size != 0) {
3680Sstevel@tonic-gate 		ipcl_conn_fanout_size = tcp_conn_hash_size;
3690Sstevel@tonic-gate 	} else {
3700Sstevel@tonic-gate 		extern pgcnt_t freemem;
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate 		ipcl_conn_fanout_size =
3730Sstevel@tonic-gate 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
3740Sstevel@tonic-gate 
3750Sstevel@tonic-gate 		if (ipcl_conn_fanout_size > ipcl_conn_hash_maxsize)
3760Sstevel@tonic-gate 			ipcl_conn_fanout_size = ipcl_conn_hash_maxsize;
3770Sstevel@tonic-gate 	}
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
3800Sstevel@tonic-gate 		if (sizes[i] >= ipcl_conn_fanout_size) {
3810Sstevel@tonic-gate 			break;
3820Sstevel@tonic-gate 		}
3830Sstevel@tonic-gate 	}
3840Sstevel@tonic-gate 	if ((ipcl_conn_fanout_size = sizes[i]) == 0) {
3850Sstevel@tonic-gate 		/* Out of range, use the 2^16 value */
3860Sstevel@tonic-gate 		ipcl_conn_fanout_size = sizes[16];
3870Sstevel@tonic-gate 	}
3880Sstevel@tonic-gate 	ipcl_conn_fanout = (connf_t *)kmem_zalloc(ipcl_conn_fanout_size *
3890Sstevel@tonic-gate 	    sizeof (*ipcl_conn_fanout), KM_SLEEP);
3900Sstevel@tonic-gate 
3910Sstevel@tonic-gate 	for (i = 0; i < ipcl_conn_fanout_size; i++) {
3920Sstevel@tonic-gate 		mutex_init(&ipcl_conn_fanout[i].connf_lock, NULL,
3930Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
3940Sstevel@tonic-gate 	}
3950Sstevel@tonic-gate 
3960Sstevel@tonic-gate 	ipcl_bind_fanout = (connf_t *)kmem_zalloc(ipcl_bind_fanout_size *
3970Sstevel@tonic-gate 	    sizeof (*ipcl_bind_fanout), KM_SLEEP);
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate 	for (i = 0; i < ipcl_bind_fanout_size; i++) {
4000Sstevel@tonic-gate 		mutex_init(&ipcl_bind_fanout[i].connf_lock, NULL,
4010Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4020Sstevel@tonic-gate 	}
4030Sstevel@tonic-gate 
4040Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) {
4050Sstevel@tonic-gate 		mutex_init(&ipcl_proto_fanout[i].connf_lock, NULL,
4060Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4070Sstevel@tonic-gate 	}
4080Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) {
4090Sstevel@tonic-gate 		mutex_init(&ipcl_proto_fanout_v6[i].connf_lock, NULL,
4100Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4110Sstevel@tonic-gate 	}
4120Sstevel@tonic-gate 
4130Sstevel@tonic-gate 	mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL);
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 	ipcl_udp_fanout = (connf_t *)kmem_zalloc(ipcl_udp_fanout_size *
4160Sstevel@tonic-gate 	    sizeof (*ipcl_udp_fanout), KM_SLEEP);
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 	for (i = 0; i < ipcl_udp_fanout_size; i++) {
4190Sstevel@tonic-gate 		mutex_init(&ipcl_udp_fanout[i].connf_lock, NULL,
4200Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4210Sstevel@tonic-gate 	}
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 	ipcl_raw_fanout = (connf_t *)kmem_zalloc(ipcl_raw_fanout_size *
4240Sstevel@tonic-gate 	    sizeof (*ipcl_raw_fanout), KM_SLEEP);
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate 	for (i = 0; i < ipcl_raw_fanout_size; i++) {
4270Sstevel@tonic-gate 		mutex_init(&ipcl_raw_fanout[i].connf_lock, NULL,
4280Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4290Sstevel@tonic-gate 	}
4300Sstevel@tonic-gate 
4310Sstevel@tonic-gate 	ipcl_globalhash_fanout = (connf_t *)kmem_zalloc(sizeof (connf_t) *
4320Sstevel@tonic-gate 	    CONN_G_HASH_SIZE, KM_SLEEP);
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4350Sstevel@tonic-gate 		mutex_init(&ipcl_globalhash_fanout[i].connf_lock, NULL,
4360Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4370Sstevel@tonic-gate 	}
4380Sstevel@tonic-gate }
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate void
4410Sstevel@tonic-gate ipcl_destroy(void)
4420Sstevel@tonic-gate {
4430Sstevel@tonic-gate 	int i;
4440Sstevel@tonic-gate 	kmem_cache_destroy(ipcl_conn_cache);
4450Sstevel@tonic-gate 	kmem_cache_destroy(ipcl_tcpconn_cache);
4460Sstevel@tonic-gate 	for (i = 0; i < ipcl_conn_fanout_size; i++)
4470Sstevel@tonic-gate 		mutex_destroy(&ipcl_conn_fanout[i].connf_lock);
4480Sstevel@tonic-gate 	kmem_free(ipcl_conn_fanout, ipcl_conn_fanout_size *
4490Sstevel@tonic-gate 	    sizeof (*ipcl_conn_fanout));
4500Sstevel@tonic-gate 	for (i = 0; i < ipcl_bind_fanout_size; i++)
4510Sstevel@tonic-gate 		mutex_destroy(&ipcl_bind_fanout[i].connf_lock);
4520Sstevel@tonic-gate 	kmem_free(ipcl_bind_fanout, ipcl_bind_fanout_size *
4530Sstevel@tonic-gate 	    sizeof (*ipcl_bind_fanout));
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout); i++)
4560Sstevel@tonic-gate 		mutex_destroy(&ipcl_proto_fanout[i].connf_lock);
4570Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++)
4580Sstevel@tonic-gate 		mutex_destroy(&ipcl_proto_fanout_v6[i].connf_lock);
4590Sstevel@tonic-gate 
4600Sstevel@tonic-gate 	for (i = 0; i < ipcl_udp_fanout_size; i++)
4610Sstevel@tonic-gate 		mutex_destroy(&ipcl_udp_fanout[i].connf_lock);
4620Sstevel@tonic-gate 	kmem_free(ipcl_udp_fanout, ipcl_udp_fanout_size *
4630Sstevel@tonic-gate 	    sizeof (*ipcl_udp_fanout));
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate 	for (i = 0; i < ipcl_raw_fanout_size; i++)
4660Sstevel@tonic-gate 		mutex_destroy(&ipcl_raw_fanout[i].connf_lock);
4670Sstevel@tonic-gate 	kmem_free(ipcl_raw_fanout, ipcl_raw_fanout_size *
4680Sstevel@tonic-gate 	    sizeof (*ipcl_raw_fanout));
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 	kmem_free(ipcl_globalhash_fanout, sizeof (connf_t) * CONN_G_HASH_SIZE);
4710Sstevel@tonic-gate 	mutex_destroy(&rts_clients.connf_lock);
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate /*
4750Sstevel@tonic-gate  * conn creation routine. initialize the conn, sets the reference
4760Sstevel@tonic-gate  * and inserts it in the global hash table.
4770Sstevel@tonic-gate  */
4780Sstevel@tonic-gate conn_t *
4790Sstevel@tonic-gate ipcl_conn_create(uint32_t type, int sleep)
4800Sstevel@tonic-gate {
4810Sstevel@tonic-gate 	itc_t	*itc;
4820Sstevel@tonic-gate 	conn_t	*connp;
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate 	switch (type) {
4850Sstevel@tonic-gate 	case IPCL_TCPCONN:
4860Sstevel@tonic-gate 		if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache,
4870Sstevel@tonic-gate 		    sleep)) == NULL)
4880Sstevel@tonic-gate 			return (NULL);
4890Sstevel@tonic-gate 		connp = &itc->itc_conn;
4900Sstevel@tonic-gate 		connp->conn_ref = 1;
4910Sstevel@tonic-gate 		IPCL_DEBUG_LVL(1,
4920Sstevel@tonic-gate 		    ("ipcl_conn_create: connp = %p tcp (%p)",
4930Sstevel@tonic-gate 		    (void *)connp, (void *)connp->conn_tcp));
4940Sstevel@tonic-gate 		ipcl_globalhash_insert(connp);
4950Sstevel@tonic-gate 		break;
4960Sstevel@tonic-gate 	case IPCL_SCTPCONN:
4970Sstevel@tonic-gate 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
4980Sstevel@tonic-gate 			return (NULL);
4990Sstevel@tonic-gate 		connp->conn_flags = IPCL_SCTPCONN;
5000Sstevel@tonic-gate 		break;
5010Sstevel@tonic-gate 	case IPCL_IPCCONN:
5020Sstevel@tonic-gate 		connp = kmem_cache_alloc(ipcl_conn_cache, sleep);
5030Sstevel@tonic-gate 		if (connp == NULL)
5040Sstevel@tonic-gate 			return (connp);
5050Sstevel@tonic-gate 		bzero(connp, sizeof (conn_t));
5060Sstevel@tonic-gate 		mutex_init(&connp->conn_lock, NULL,
5070Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
5080Sstevel@tonic-gate 		cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
5090Sstevel@tonic-gate 		connp->conn_flags |= IPCL_IPCCONN;
5100Sstevel@tonic-gate 		connp->conn_ref = 1;
5110Sstevel@tonic-gate 		IPCL_DEBUG_LVL(1,
5120Sstevel@tonic-gate 		    ("ipcl_conn_create: connp = %p\n", (void *)connp));
5130Sstevel@tonic-gate 		ipcl_globalhash_insert(connp);
5140Sstevel@tonic-gate 		break;
5150Sstevel@tonic-gate 	}
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate 	return (connp);
5180Sstevel@tonic-gate }
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate void
5210Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp)
5220Sstevel@tonic-gate {
5230Sstevel@tonic-gate 	mblk_t	*mp;
5240Sstevel@tonic-gate 	tcp_t	*tcp = connp->conn_tcp;
5250Sstevel@tonic-gate 
5260Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
5270Sstevel@tonic-gate 	ASSERT(connp->conn_ref == 0);
5280Sstevel@tonic-gate 	ASSERT(connp->conn_ire_cache == NULL);
5290Sstevel@tonic-gate 
5300Sstevel@tonic-gate 	ipcl_globalhash_remove(connp);
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate 	cv_destroy(&connp->conn_cv);
5330Sstevel@tonic-gate 	if (connp->conn_flags & IPCL_TCPCONN) {
5340Sstevel@tonic-gate 		mutex_destroy(&connp->conn_lock);
5350Sstevel@tonic-gate 		ASSERT(connp->conn_tcp != NULL);
5360Sstevel@tonic-gate 		tcp_free(tcp);
5370Sstevel@tonic-gate 		mp = tcp->tcp_timercache;
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 		if (tcp->tcp_sack_info != NULL) {
5400Sstevel@tonic-gate 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
5410Sstevel@tonic-gate 			kmem_cache_free(tcp_sack_info_cache,
5420Sstevel@tonic-gate 			    tcp->tcp_sack_info);
5430Sstevel@tonic-gate 		}
5440Sstevel@tonic-gate 		if (tcp->tcp_iphc != NULL) {
5450Sstevel@tonic-gate 			if (tcp->tcp_hdr_grown) {
5460Sstevel@tonic-gate 				kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len);
5470Sstevel@tonic-gate 			} else {
5480Sstevel@tonic-gate 				bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
5490Sstevel@tonic-gate 				kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc);
5500Sstevel@tonic-gate 			}
5510Sstevel@tonic-gate 			tcp->tcp_iphc_len = 0;
5520Sstevel@tonic-gate 		}
5530Sstevel@tonic-gate 		ASSERT(tcp->tcp_iphc_len == 0);
5540Sstevel@tonic-gate 
5550Sstevel@tonic-gate 		if (connp->conn_latch != NULL)
5560Sstevel@tonic-gate 			IPLATCH_REFRELE(connp->conn_latch);
5570Sstevel@tonic-gate 		if (connp->conn_policy != NULL)
5580Sstevel@tonic-gate 			IPPH_REFRELE(connp->conn_policy);
5590Sstevel@tonic-gate 		bzero(connp, sizeof (itc_t));
5600Sstevel@tonic-gate 
5610Sstevel@tonic-gate 		tcp->tcp_timercache = mp;
5620Sstevel@tonic-gate 		connp->conn_tcp = tcp;
5630Sstevel@tonic-gate 		connp->conn_flags = IPCL_TCPCONN;
5640Sstevel@tonic-gate 		connp->conn_ulp = IPPROTO_TCP;
5650Sstevel@tonic-gate 		tcp->tcp_connp = connp;
5660Sstevel@tonic-gate 		kmem_cache_free(ipcl_tcpconn_cache, connp);
5670Sstevel@tonic-gate 	} else if (connp->conn_flags & IPCL_SCTPCONN) {
5680Sstevel@tonic-gate 		sctp_free(connp);
5690Sstevel@tonic-gate 	} else {
5700Sstevel@tonic-gate 		mutex_destroy(&connp->conn_lock);
5710Sstevel@tonic-gate 		kmem_cache_free(ipcl_conn_cache, connp);
5720Sstevel@tonic-gate 	}
5730Sstevel@tonic-gate }
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate /*
5760Sstevel@tonic-gate  * Running in cluster mode - deregister listener information
5770Sstevel@tonic-gate  */
5780Sstevel@tonic-gate 
5790Sstevel@tonic-gate static void
5800Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp)
5810Sstevel@tonic-gate {
5820Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
5830Sstevel@tonic-gate 	ASSERT(connp->conn_lport != 0);
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate 	if (cl_inet_unlisten != NULL) {
5860Sstevel@tonic-gate 		sa_family_t	addr_family;
5870Sstevel@tonic-gate 		uint8_t		*laddrp;
5880Sstevel@tonic-gate 
5890Sstevel@tonic-gate 		if (connp->conn_pkt_isv6) {
5900Sstevel@tonic-gate 			addr_family = AF_INET6;
5910Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source_v6;
5920Sstevel@tonic-gate 		} else {
5930Sstevel@tonic-gate 			addr_family = AF_INET;
5940Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source;
5950Sstevel@tonic-gate 		}
5960Sstevel@tonic-gate 		(*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp,
5970Sstevel@tonic-gate 		    connp->conn_lport);
5980Sstevel@tonic-gate 	}
5990Sstevel@tonic-gate 	connp->conn_flags &= ~IPCL_CL_LISTENER;
6000Sstevel@tonic-gate }
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate /*
6030Sstevel@tonic-gate  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
6040Sstevel@tonic-gate  * which table the conn belonged to). So for debugging we can see which hash
6050Sstevel@tonic-gate  * table this connection was in.
6060Sstevel@tonic-gate  */
6070Sstevel@tonic-gate #define	IPCL_HASH_REMOVE(connp)	{					\
6080Sstevel@tonic-gate 	connf_t	*connfp = (connp)->conn_fanout;				\
6090Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
6100Sstevel@tonic-gate 	if (connfp != NULL) {						\
6110Sstevel@tonic-gate 		IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p",	\
6120Sstevel@tonic-gate 		    (void *)(connp)));					\
6130Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);			\
6140Sstevel@tonic-gate 		if ((connp)->conn_next != NULL)				\
6150Sstevel@tonic-gate 			(connp)->conn_next->conn_prev =			\
6160Sstevel@tonic-gate 			    (connp)->conn_prev;				\
6170Sstevel@tonic-gate 		if ((connp)->conn_prev != NULL)				\
6180Sstevel@tonic-gate 			(connp)->conn_prev->conn_next =			\
6190Sstevel@tonic-gate 			    (connp)->conn_next;				\
6200Sstevel@tonic-gate 		else							\
6210Sstevel@tonic-gate 			connfp->connf_head = (connp)->conn_next;	\
6220Sstevel@tonic-gate 		(connp)->conn_fanout = NULL;				\
6230Sstevel@tonic-gate 		(connp)->conn_next = NULL;				\
6240Sstevel@tonic-gate 		(connp)->conn_prev = NULL;				\
6250Sstevel@tonic-gate 		(connp)->conn_flags |= IPCL_REMOVED;			\
6260Sstevel@tonic-gate 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
6270Sstevel@tonic-gate 			ipcl_conn_unlisten((connp));			\
6280Sstevel@tonic-gate 		CONN_DEC_REF((connp));					\
6290Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);			\
6300Sstevel@tonic-gate 	}								\
6310Sstevel@tonic-gate }
6320Sstevel@tonic-gate 
6330Sstevel@tonic-gate void
6340Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp)
6350Sstevel@tonic-gate {
6360Sstevel@tonic-gate 	IPCL_HASH_REMOVE(connp);
6370Sstevel@tonic-gate }
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate /*
6400Sstevel@tonic-gate  * The whole purpose of this function is allow removal of
6410Sstevel@tonic-gate  * a conn_t from the connected hash for timewait reclaim.
6420Sstevel@tonic-gate  * This is essentially a TW reclaim fastpath where timewait
6430Sstevel@tonic-gate  * collector checks under fanout lock (so no one else can
6440Sstevel@tonic-gate  * get access to the conn_t) that refcnt is 2 i.e. one for
6450Sstevel@tonic-gate  * TCP and one for the classifier hash list. If ref count
6460Sstevel@tonic-gate  * is indeed 2, we can just remove the conn under lock and
6470Sstevel@tonic-gate  * avoid cleaning up the conn under squeue. This gives us
6480Sstevel@tonic-gate  * improved performance.
6490Sstevel@tonic-gate  */
6500Sstevel@tonic-gate void
6510Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
6520Sstevel@tonic-gate {
6530Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
6540Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
6550Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
6560Sstevel@tonic-gate 
6570Sstevel@tonic-gate 	if ((connp)->conn_next != NULL) {
6580Sstevel@tonic-gate 		(connp)->conn_next->conn_prev =
6590Sstevel@tonic-gate 			(connp)->conn_prev;
6600Sstevel@tonic-gate 	}
6610Sstevel@tonic-gate 	if ((connp)->conn_prev != NULL) {
6620Sstevel@tonic-gate 		(connp)->conn_prev->conn_next =
6630Sstevel@tonic-gate 			(connp)->conn_next;
6640Sstevel@tonic-gate 	} else {
6650Sstevel@tonic-gate 		connfp->connf_head = (connp)->conn_next;
6660Sstevel@tonic-gate 	}
6670Sstevel@tonic-gate 	(connp)->conn_fanout = NULL;
6680Sstevel@tonic-gate 	(connp)->conn_next = NULL;
6690Sstevel@tonic-gate 	(connp)->conn_prev = NULL;
6700Sstevel@tonic-gate 	(connp)->conn_flags |= IPCL_REMOVED;
6710Sstevel@tonic-gate 	ASSERT((connp)->conn_ref == 2);
6720Sstevel@tonic-gate 	(connp)->conn_ref--;
6730Sstevel@tonic-gate }
6740Sstevel@tonic-gate 
6750Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
6760Sstevel@tonic-gate 	ASSERT((connp)->conn_fanout == NULL);				\
6770Sstevel@tonic-gate 	ASSERT((connp)->conn_next == NULL);				\
6780Sstevel@tonic-gate 	ASSERT((connp)->conn_prev == NULL);				\
6790Sstevel@tonic-gate 	if ((connfp)->connf_head != NULL) {				\
6800Sstevel@tonic-gate 		(connfp)->connf_head->conn_prev = (connp);		\
6810Sstevel@tonic-gate 		(connp)->conn_next = (connfp)->connf_head;		\
6820Sstevel@tonic-gate 	}								\
6830Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
6840Sstevel@tonic-gate 	(connfp)->connf_head = (connp);					\
6850Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
6860Sstevel@tonic-gate 	    IPCL_CONNECTED;						\
6870Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
6880Sstevel@tonic-gate }
6890Sstevel@tonic-gate 
6900Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
6910Sstevel@tonic-gate 	IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p "	\
6920Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
6930Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
6940Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
6950Sstevel@tonic-gate 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
6960Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
6970Sstevel@tonic-gate }
6980Sstevel@tonic-gate 
6990Sstevel@tonic-gate #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
7000Sstevel@tonic-gate 	conn_t *pconnp = NULL, *nconnp;					\
7010Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p "	\
7020Sstevel@tonic-gate 	    "connp %p", (void *)connfp, (void *)(connp)));		\
7030Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
7040Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
7050Sstevel@tonic-gate 	nconnp = (connfp)->connf_head;					\
706*153Sethindra 	while (nconnp != NULL &&					\
707*153Sethindra 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) {			\
708*153Sethindra 		pconnp = nconnp;					\
709*153Sethindra 		nconnp = nconnp->conn_next;				\
7100Sstevel@tonic-gate 	}								\
7110Sstevel@tonic-gate 	if (pconnp != NULL) {						\
7120Sstevel@tonic-gate 		pconnp->conn_next = (connp);				\
7130Sstevel@tonic-gate 		(connp)->conn_prev = pconnp;				\
7140Sstevel@tonic-gate 	} else {							\
7150Sstevel@tonic-gate 		(connfp)->connf_head = (connp);				\
7160Sstevel@tonic-gate 	}								\
7170Sstevel@tonic-gate 	if (nconnp != NULL) {						\
7180Sstevel@tonic-gate 		(connp)->conn_next = nconnp;				\
7190Sstevel@tonic-gate 		nconnp->conn_prev = (connp);				\
7200Sstevel@tonic-gate 	}								\
7210Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
7220Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
7230Sstevel@tonic-gate 	    IPCL_BOUND;							\
7240Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
7250Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
7260Sstevel@tonic-gate }
7270Sstevel@tonic-gate 
7280Sstevel@tonic-gate #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
7290Sstevel@tonic-gate 	conn_t **list, *prev, *next;					\
7300Sstevel@tonic-gate 	boolean_t isv4mapped =						\
7310Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6);			\
7320Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p "	\
7330Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
7340Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
7350Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
7360Sstevel@tonic-gate 	list = &(connfp)->connf_head;					\
7370Sstevel@tonic-gate 	prev = NULL;							\
7380Sstevel@tonic-gate 	while ((next = *list) != NULL) {				\
7390Sstevel@tonic-gate 		if (isv4mapped &&					\
7400Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) &&	\
7410Sstevel@tonic-gate 		    connp->conn_zoneid == next->conn_zoneid) {		\
7420Sstevel@tonic-gate 			(connp)->conn_next = next;			\
7430Sstevel@tonic-gate 			if (prev != NULL)				\
7440Sstevel@tonic-gate 				prev = next->conn_prev;			\
7450Sstevel@tonic-gate 			next->conn_prev = (connp);			\
7460Sstevel@tonic-gate 			break;						\
7470Sstevel@tonic-gate 		}							\
7480Sstevel@tonic-gate 		list = &next->conn_next;				\
7490Sstevel@tonic-gate 		prev = next;						\
7500Sstevel@tonic-gate 	}								\
7510Sstevel@tonic-gate 	(connp)->conn_prev = prev;					\
7520Sstevel@tonic-gate 	*list = (connp);						\
7530Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
7540Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
7550Sstevel@tonic-gate 	    IPCL_BOUND;							\
7560Sstevel@tonic-gate 	CONN_INC_REF((connp));						\
7570Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
7580Sstevel@tonic-gate }
7590Sstevel@tonic-gate 
7600Sstevel@tonic-gate void
7610Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
7620Sstevel@tonic-gate {
7630Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
7640Sstevel@tonic-gate }
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate void
7670Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol)
7680Sstevel@tonic-gate {
7690Sstevel@tonic-gate 	connf_t	*connfp;
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate 	ASSERT(connp != NULL);
7720Sstevel@tonic-gate 
7730Sstevel@tonic-gate 	connp->conn_ulp = protocol;
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate 	/* Insert it in the protocol hash */
7760Sstevel@tonic-gate 	connfp = &ipcl_proto_fanout[protocol];
7770Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
7780Sstevel@tonic-gate }
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate void
7810Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol)
7820Sstevel@tonic-gate {
7830Sstevel@tonic-gate 	connf_t	*connfp;
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate 	ASSERT(connp != NULL);
7860Sstevel@tonic-gate 
7870Sstevel@tonic-gate 	connp->conn_ulp = protocol;
7880Sstevel@tonic-gate 
7890Sstevel@tonic-gate 	/* Insert it in the Bind Hash */
7900Sstevel@tonic-gate 	connfp = &ipcl_proto_fanout_v6[protocol];
7910Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
7920Sstevel@tonic-gate }
7930Sstevel@tonic-gate 
7940Sstevel@tonic-gate /*
7950Sstevel@tonic-gate  * This function is used only for inserting SCTP raw socket now.
7960Sstevel@tonic-gate  * This may change later.
7970Sstevel@tonic-gate  *
7980Sstevel@tonic-gate  * Note that only one raw socket can be bound to a port.  The param
7990Sstevel@tonic-gate  * lport is in network byte order.
8000Sstevel@tonic-gate  */
8010Sstevel@tonic-gate static int
8020Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
8030Sstevel@tonic-gate {
8040Sstevel@tonic-gate 	connf_t	*connfp;
8050Sstevel@tonic-gate 	conn_t	*oconnp;
8060Sstevel@tonic-gate 
8070Sstevel@tonic-gate 	connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))];
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate 	/* Check for existing raw socket already bound to the port. */
8100Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
8110Sstevel@tonic-gate 	for (oconnp = connfp->connf_head; oconnp != NULL;
8120Sstevel@tonic-gate 	    oconnp = connp->conn_next) {
8130Sstevel@tonic-gate 		if (oconnp->conn_lport == lport &&
8140Sstevel@tonic-gate 		    oconnp->conn_zoneid == connp->conn_zoneid &&
8150Sstevel@tonic-gate 		    oconnp->conn_af_isv6 == connp->conn_af_isv6 &&
8160Sstevel@tonic-gate 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
8170Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) ||
8180Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) ||
8190Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) ||
8200Sstevel@tonic-gate 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6,
8210Sstevel@tonic-gate 		    &connp->conn_srcv6))) {
8220Sstevel@tonic-gate 			break;
8230Sstevel@tonic-gate 		}
8240Sstevel@tonic-gate 	}
8250Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
8260Sstevel@tonic-gate 	if (oconnp != NULL)
8270Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
8280Sstevel@tonic-gate 
8290Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
8300Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) {
8310Sstevel@tonic-gate 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
8320Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) {
8330Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8340Sstevel@tonic-gate 		} else {
8350Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
8360Sstevel@tonic-gate 		}
8370Sstevel@tonic-gate 	} else {
8380Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
8390Sstevel@tonic-gate 	}
8400Sstevel@tonic-gate 	return (0);
8410Sstevel@tonic-gate }
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate /*
8440Sstevel@tonic-gate  * (v4, v6) bind hash insertion routines
8450Sstevel@tonic-gate  */
8460Sstevel@tonic-gate int
8470Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport)
8480Sstevel@tonic-gate {
8490Sstevel@tonic-gate 	connf_t	*connfp;
8500Sstevel@tonic-gate #ifdef	IPCL_DEBUG
8510Sstevel@tonic-gate 	char	buf[INET_NTOA_BUFSIZE];
8520Sstevel@tonic-gate #endif
8530Sstevel@tonic-gate 	int	ret = 0;
8540Sstevel@tonic-gate 
8550Sstevel@tonic-gate 	ASSERT(connp);
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate 	IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, "
8580Sstevel@tonic-gate 	    "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport));
8590Sstevel@tonic-gate 
8600Sstevel@tonic-gate 	connp->conn_ulp = protocol;
8610Sstevel@tonic-gate 	IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6);
8620Sstevel@tonic-gate 	connp->conn_lport = lport;
8630Sstevel@tonic-gate 
8640Sstevel@tonic-gate 	switch (protocol) {
8650Sstevel@tonic-gate 	case IPPROTO_UDP:
8660Sstevel@tonic-gate 	default:
8670Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
8680Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
8690Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - udp\n",
8700Sstevel@tonic-gate 			    (void *)connp));
8710Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
8720Sstevel@tonic-gate 		} else {
8730Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
8740Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - protocol\n",
8750Sstevel@tonic-gate 			    (void *)connp));
8760Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout[protocol];
8770Sstevel@tonic-gate 		}
8780Sstevel@tonic-gate 
8790Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
8800Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
8810Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
8820Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
8830Sstevel@tonic-gate 		} else {
8840Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8850Sstevel@tonic-gate 		}
8860Sstevel@tonic-gate 		break;
8870Sstevel@tonic-gate 
8880Sstevel@tonic-gate 	case IPPROTO_TCP:
8890Sstevel@tonic-gate 
8900Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
8910Sstevel@tonic-gate 		connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
8920Sstevel@tonic-gate 		if (connp->conn_src != INADDR_ANY) {
8930Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
8940Sstevel@tonic-gate 		} else {
8950Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8960Sstevel@tonic-gate 		}
8970Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
8980Sstevel@tonic-gate 			ASSERT(!connp->conn_pkt_isv6);
8990Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
9000Sstevel@tonic-gate 			(*cl_inet_listen)(IPPROTO_TCP, AF_INET,
9010Sstevel@tonic-gate 			    (uint8_t *)&connp->conn_bound_source, lport);
9020Sstevel@tonic-gate 		}
9030Sstevel@tonic-gate 		break;
9040Sstevel@tonic-gate 
9050Sstevel@tonic-gate 	case IPPROTO_SCTP:
9060Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
9070Sstevel@tonic-gate 		break;
9080Sstevel@tonic-gate 	}
9090Sstevel@tonic-gate 
9100Sstevel@tonic-gate 	return (ret);
9110Sstevel@tonic-gate }
9120Sstevel@tonic-gate 
9130Sstevel@tonic-gate int
9140Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
9150Sstevel@tonic-gate     uint16_t lport)
9160Sstevel@tonic-gate {
9170Sstevel@tonic-gate 	connf_t	*connfp;
9180Sstevel@tonic-gate 	int	ret = 0;
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate 	ASSERT(connp);
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate 	connp->conn_ulp = protocol;
9230Sstevel@tonic-gate 	connp->conn_srcv6 = *src;
9240Sstevel@tonic-gate 	connp->conn_lport = lport;
9250Sstevel@tonic-gate 
9260Sstevel@tonic-gate 	switch (protocol) {
9270Sstevel@tonic-gate 	case IPPROTO_UDP:
9280Sstevel@tonic-gate 	default:
9290Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
9300Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
9310Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - udp\n",
9320Sstevel@tonic-gate 			    (void *)connp));
9330Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
9340Sstevel@tonic-gate 		} else {
9350Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
9360Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - protocol\n",
9370Sstevel@tonic-gate 			    (void *)connp));
9380Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout_v6[protocol];
9390Sstevel@tonic-gate 		}
9400Sstevel@tonic-gate 
9410Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
9420Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
9430Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
9440Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
9450Sstevel@tonic-gate 		} else {
9460Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9470Sstevel@tonic-gate 		}
9480Sstevel@tonic-gate 		break;
9490Sstevel@tonic-gate 
9500Sstevel@tonic-gate 	case IPPROTO_TCP:
9510Sstevel@tonic-gate 		/* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
9540Sstevel@tonic-gate 		connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
9550Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
9560Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
9570Sstevel@tonic-gate 		} else {
9580Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9590Sstevel@tonic-gate 		}
9600Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
9610Sstevel@tonic-gate 			sa_family_t	addr_family;
9620Sstevel@tonic-gate 			uint8_t		*laddrp;
9630Sstevel@tonic-gate 
9640Sstevel@tonic-gate 			if (connp->conn_pkt_isv6) {
9650Sstevel@tonic-gate 				addr_family = AF_INET6;
9660Sstevel@tonic-gate 				laddrp =
9670Sstevel@tonic-gate 				    (uint8_t *)&connp->conn_bound_source_v6;
9680Sstevel@tonic-gate 			} else {
9690Sstevel@tonic-gate 				addr_family = AF_INET;
9700Sstevel@tonic-gate 				laddrp = (uint8_t *)&connp->conn_bound_source;
9710Sstevel@tonic-gate 			}
9720Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
9730Sstevel@tonic-gate 			(*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp,
9740Sstevel@tonic-gate 			    lport);
9750Sstevel@tonic-gate 		}
9760Sstevel@tonic-gate 		break;
9770Sstevel@tonic-gate 
9780Sstevel@tonic-gate 	case IPPROTO_SCTP:
9790Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
9800Sstevel@tonic-gate 		break;
9810Sstevel@tonic-gate 	}
9820Sstevel@tonic-gate 
9830Sstevel@tonic-gate 	return (ret);
9840Sstevel@tonic-gate }
9850Sstevel@tonic-gate 
9860Sstevel@tonic-gate /*
9870Sstevel@tonic-gate  * ipcl_conn_hash insertion routines.
9880Sstevel@tonic-gate  */
9890Sstevel@tonic-gate int
9900Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src,
9910Sstevel@tonic-gate     ipaddr_t rem, uint32_t ports)
9920Sstevel@tonic-gate {
9930Sstevel@tonic-gate 	connf_t		*connfp;
9940Sstevel@tonic-gate 	uint16_t	*up;
9950Sstevel@tonic-gate 	conn_t		*tconnp;
9960Sstevel@tonic-gate #ifdef	IPCL_DEBUG
9970Sstevel@tonic-gate 	char	sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE];
9980Sstevel@tonic-gate #endif
9990Sstevel@tonic-gate 	in_port_t	lport;
10000Sstevel@tonic-gate 	int		ret = 0;
10010Sstevel@tonic-gate 
10020Sstevel@tonic-gate 	IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, "
10030Sstevel@tonic-gate 	    "dst = %s, ports = %x, protocol = %x", (void *)connp,
10040Sstevel@tonic-gate 	    inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf),
10050Sstevel@tonic-gate 	    ports, protocol));
10060Sstevel@tonic-gate 
10070Sstevel@tonic-gate 	switch (protocol) {
10080Sstevel@tonic-gate 	case IPPROTO_TCP:
10090Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
10100Sstevel@tonic-gate 			/*
10110Sstevel@tonic-gate 			 * for a eager connection, i.e connections which
10120Sstevel@tonic-gate 			 * have just been created, the initialization is
10130Sstevel@tonic-gate 			 * already done in ip at conn_creation time, so
10140Sstevel@tonic-gate 			 * we can skip the checks here.
10150Sstevel@tonic-gate 			 */
10160Sstevel@tonic-gate 			IPCL_CONN_INIT(connp, protocol, src, rem, ports);
10170Sstevel@tonic-gate 		}
10180Sstevel@tonic-gate 		connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(connp->conn_rem,
10190Sstevel@tonic-gate 		    connp->conn_ports)];
10200Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
10210Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
10220Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
10230Sstevel@tonic-gate 			if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp,
10240Sstevel@tonic-gate 			    connp->conn_rem, connp->conn_src,
10250Sstevel@tonic-gate 			    connp->conn_ports)) {
10260Sstevel@tonic-gate 
10270Sstevel@tonic-gate 				/* Already have a conn. bail out */
10280Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
10290Sstevel@tonic-gate 				return (EADDRINUSE);
10300Sstevel@tonic-gate 			}
10310Sstevel@tonic-gate 		}
10320Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
10330Sstevel@tonic-gate 			/*
10340Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
10350Sstevel@tonic-gate 			 * rebind. Let it happen.
10360Sstevel@tonic-gate 			 */
10370Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
10380Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
10390Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
10400Sstevel@tonic-gate 		}
10410Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
10420Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
10430Sstevel@tonic-gate 		break;
10440Sstevel@tonic-gate 
10450Sstevel@tonic-gate 	case IPPROTO_SCTP:
10460Sstevel@tonic-gate 		lport = (uint16_t)(ntohl(ports) & 0xFFFF);
10470Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
10480Sstevel@tonic-gate 		break;
10490Sstevel@tonic-gate 
10500Sstevel@tonic-gate 	case IPPROTO_UDP:
10510Sstevel@tonic-gate 	default:
10520Sstevel@tonic-gate 		up = (uint16_t *)&ports;
10530Sstevel@tonic-gate 		IPCL_CONN_INIT(connp, protocol, src, rem, ports);
10540Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
10550Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])];
10560Sstevel@tonic-gate 		} else {
10570Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout[protocol];
10580Sstevel@tonic-gate 		}
10590Sstevel@tonic-gate 
10600Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
10610Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
10620Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
10630Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10640Sstevel@tonic-gate 		} else {
10650Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10660Sstevel@tonic-gate 		}
10670Sstevel@tonic-gate 		break;
10680Sstevel@tonic-gate 	}
10690Sstevel@tonic-gate 
10700Sstevel@tonic-gate 	return (ret);
10710Sstevel@tonic-gate }
10720Sstevel@tonic-gate 
10730Sstevel@tonic-gate int
10740Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
10750Sstevel@tonic-gate     const in6_addr_t *rem, uint32_t ports, uint_t ifindex)
10760Sstevel@tonic-gate {
10770Sstevel@tonic-gate 	connf_t		*connfp;
10780Sstevel@tonic-gate 	uint16_t	*up;
10790Sstevel@tonic-gate 	conn_t		*tconnp;
10800Sstevel@tonic-gate 	in_port_t	lport;
10810Sstevel@tonic-gate 	int		ret = 0;
10820Sstevel@tonic-gate 
10830Sstevel@tonic-gate 	switch (protocol) {
10840Sstevel@tonic-gate 	case IPPROTO_TCP:
10850Sstevel@tonic-gate 		/* Just need to insert a conn struct */
10860Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
10870Sstevel@tonic-gate 			IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
10880Sstevel@tonic-gate 		}
10890Sstevel@tonic-gate 		connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(connp->conn_remv6,
10900Sstevel@tonic-gate 		    connp->conn_ports)];
10910Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
10920Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
10930Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
10940Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp,
10950Sstevel@tonic-gate 			    connp->conn_remv6, connp->conn_srcv6,
10960Sstevel@tonic-gate 			    connp->conn_ports) &&
10970Sstevel@tonic-gate 			    (tconnp->conn_tcp->tcp_bound_if == 0 ||
10980Sstevel@tonic-gate 			    tconnp->conn_tcp->tcp_bound_if == ifindex)) {
10990Sstevel@tonic-gate 				/* Already have a conn. bail out */
11000Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
11010Sstevel@tonic-gate 				return (EADDRINUSE);
11020Sstevel@tonic-gate 			}
11030Sstevel@tonic-gate 		}
11040Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
11050Sstevel@tonic-gate 			/*
11060Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
11070Sstevel@tonic-gate 			 * rebind. Let it happen.
11080Sstevel@tonic-gate 			 */
11090Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
11100Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
11110Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
11120Sstevel@tonic-gate 		}
11130Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
11140Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
11150Sstevel@tonic-gate 		break;
11160Sstevel@tonic-gate 
11170Sstevel@tonic-gate 	case IPPROTO_SCTP:
11180Sstevel@tonic-gate 		lport = (uint16_t)(ntohl(ports) & 0xFFFF);
11190Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
11200Sstevel@tonic-gate 		break;
11210Sstevel@tonic-gate 
11220Sstevel@tonic-gate 	case IPPROTO_UDP:
11230Sstevel@tonic-gate 	default:
11240Sstevel@tonic-gate 		up = (uint16_t *)&ports;
11250Sstevel@tonic-gate 		IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
11260Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
11270Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])];
11280Sstevel@tonic-gate 		} else {
11290Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout_v6[protocol];
11300Sstevel@tonic-gate 		}
11310Sstevel@tonic-gate 
11320Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
11330Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
11340Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
11350Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
11360Sstevel@tonic-gate 		} else {
11370Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
11380Sstevel@tonic-gate 		}
11390Sstevel@tonic-gate 		break;
11400Sstevel@tonic-gate 	}
11410Sstevel@tonic-gate 
11420Sstevel@tonic-gate 	return (ret);
11430Sstevel@tonic-gate }
11440Sstevel@tonic-gate 
11450Sstevel@tonic-gate /*
11460Sstevel@tonic-gate  * v4 packet classifying function. looks up the fanout table to
11470Sstevel@tonic-gate  * find the conn, the packet belongs to. returns the conn with
11480Sstevel@tonic-gate  * the reference held, null otherwise.
11490Sstevel@tonic-gate  */
11500Sstevel@tonic-gate conn_t *
11510Sstevel@tonic-gate ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid)
11520Sstevel@tonic-gate {
11530Sstevel@tonic-gate 	ipha_t	*ipha;
11540Sstevel@tonic-gate 	connf_t	*connfp, *bind_connfp;
11550Sstevel@tonic-gate 	uint16_t lport;
11560Sstevel@tonic-gate 	uint16_t fport;
11570Sstevel@tonic-gate 	uint32_t ports;
11580Sstevel@tonic-gate 	conn_t	*connp;
11590Sstevel@tonic-gate 	uint16_t  *up;
11600Sstevel@tonic-gate 
11610Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
11620Sstevel@tonic-gate 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
11630Sstevel@tonic-gate 
11640Sstevel@tonic-gate 	switch (protocol) {
11650Sstevel@tonic-gate 	case IPPROTO_TCP:
11660Sstevel@tonic-gate 		ports = *(uint32_t *)up;
11670Sstevel@tonic-gate 		connfp =
11680Sstevel@tonic-gate 		    &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, ports)];
11690Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
11700Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
11710Sstevel@tonic-gate 		    connp = connp->conn_next) {
11720Sstevel@tonic-gate 			if (IPCL_CONN_MATCH(connp, protocol,
11730Sstevel@tonic-gate 			    ipha->ipha_src, ipha->ipha_dst, ports))
11740Sstevel@tonic-gate 				break;
11750Sstevel@tonic-gate 		}
11760Sstevel@tonic-gate 
11770Sstevel@tonic-gate 		if (connp != NULL) {
11780Sstevel@tonic-gate 			CONN_INC_REF(connp);
11790Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
11800Sstevel@tonic-gate 			return (connp);
11810Sstevel@tonic-gate 		}
11820Sstevel@tonic-gate 
11830Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate 		lport = up[1];
11860Sstevel@tonic-gate 		bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
11870Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
11880Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
11890Sstevel@tonic-gate 		    connp = connp->conn_next) {
11900Sstevel@tonic-gate 			if (IPCL_BIND_MATCH(connp, protocol,
11910Sstevel@tonic-gate 			    ipha->ipha_dst, lport) &&
11920Sstevel@tonic-gate 			    connp->conn_zoneid == zoneid)
11930Sstevel@tonic-gate 				break;
11940Sstevel@tonic-gate 		}
11950Sstevel@tonic-gate 
11960Sstevel@tonic-gate 		if (connp != NULL) {
11970Sstevel@tonic-gate 			/* Have a listner at least */
11980Sstevel@tonic-gate 			CONN_INC_REF(connp);
11990Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
12000Sstevel@tonic-gate 			return (connp);
12010Sstevel@tonic-gate 		}
12020Sstevel@tonic-gate 
12030Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
12040Sstevel@tonic-gate 
12050Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
12060Sstevel@tonic-gate 		    ("ipcl_classify: couldn't classify mp = %p\n",
12070Sstevel@tonic-gate 		    (void *)mp));
12080Sstevel@tonic-gate 		break;
12090Sstevel@tonic-gate 
12100Sstevel@tonic-gate 	case IPPROTO_UDP:
12110Sstevel@tonic-gate 		lport = up[1];
12120Sstevel@tonic-gate 		fport = up[0];
12130Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport));
12140Sstevel@tonic-gate 		connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
12150Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
12160Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
12170Sstevel@tonic-gate 		    connp = connp->conn_next) {
12180Sstevel@tonic-gate 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
12190Sstevel@tonic-gate 			    fport, ipha->ipha_src) &&
12200Sstevel@tonic-gate 			    connp->conn_zoneid == zoneid)
12210Sstevel@tonic-gate 				break;
12220Sstevel@tonic-gate 		}
12230Sstevel@tonic-gate 
12240Sstevel@tonic-gate 		if (connp != NULL) {
12250Sstevel@tonic-gate 			CONN_INC_REF(connp);
12260Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
12270Sstevel@tonic-gate 			return (connp);
12280Sstevel@tonic-gate 		}
12290Sstevel@tonic-gate 
12300Sstevel@tonic-gate 		/*
12310Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
12320Sstevel@tonic-gate 		 */
12330Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
12340Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
12350Sstevel@tonic-gate 		    ("ipcl_classify: cant find udp conn_t for ports : %x %x",
12360Sstevel@tonic-gate 		    lport, fport));
12370Sstevel@tonic-gate 		break;
12380Sstevel@tonic-gate 	}
12390Sstevel@tonic-gate 
12400Sstevel@tonic-gate 	return (NULL);
12410Sstevel@tonic-gate }
12420Sstevel@tonic-gate 
12430Sstevel@tonic-gate conn_t *
12440Sstevel@tonic-gate ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid)
12450Sstevel@tonic-gate {
12460Sstevel@tonic-gate 	ip6_t		*ip6h;
12470Sstevel@tonic-gate 	connf_t		*connfp, *bind_connfp;
12480Sstevel@tonic-gate 	uint16_t	lport;
12490Sstevel@tonic-gate 	uint16_t	fport;
12500Sstevel@tonic-gate 	tcph_t		*tcph;
12510Sstevel@tonic-gate 	uint32_t	ports;
12520Sstevel@tonic-gate 	conn_t		*connp;
12530Sstevel@tonic-gate 	uint16_t	*up;
12540Sstevel@tonic-gate 
12550Sstevel@tonic-gate 
12560Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
12570Sstevel@tonic-gate 
12580Sstevel@tonic-gate 	switch (protocol) {
12590Sstevel@tonic-gate 	case IPPROTO_TCP:
12600Sstevel@tonic-gate 		tcph = (tcph_t *)&mp->b_rptr[hdr_len];
12610Sstevel@tonic-gate 		up = (uint16_t *)tcph->th_lport;
12620Sstevel@tonic-gate 		ports = *(uint32_t *)up;
12630Sstevel@tonic-gate 
12640Sstevel@tonic-gate 		connfp =
12650Sstevel@tonic-gate 		    &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, ports)];
12660Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
12670Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
12680Sstevel@tonic-gate 		    connp = connp->conn_next) {
12690Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(connp, protocol,
12700Sstevel@tonic-gate 			    ip6h->ip6_src, ip6h->ip6_dst, ports))
12710Sstevel@tonic-gate 				break;
12720Sstevel@tonic-gate 		}
12730Sstevel@tonic-gate 
12740Sstevel@tonic-gate 		if (connp != NULL) {
12750Sstevel@tonic-gate 			CONN_INC_REF(connp);
12760Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
12770Sstevel@tonic-gate 			return (connp);
12780Sstevel@tonic-gate 		}
12790Sstevel@tonic-gate 
12800Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
12810Sstevel@tonic-gate 
12820Sstevel@tonic-gate 		lport = up[1];
12830Sstevel@tonic-gate 		bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
12840Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
12850Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
12860Sstevel@tonic-gate 		    connp = connp->conn_next) {
12870Sstevel@tonic-gate 			if (IPCL_BIND_MATCH_V6(connp, protocol,
12880Sstevel@tonic-gate 			    ip6h->ip6_dst, lport) &&
12890Sstevel@tonic-gate 			    connp->conn_zoneid == zoneid)
12900Sstevel@tonic-gate 				break;
12910Sstevel@tonic-gate 		}
12920Sstevel@tonic-gate 
12930Sstevel@tonic-gate 		if (connp != NULL) {
12940Sstevel@tonic-gate 			/* Have a listner at least */
12950Sstevel@tonic-gate 			CONN_INC_REF(connp);
12960Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
12970Sstevel@tonic-gate 			IPCL_DEBUG_LVL(512,
12980Sstevel@tonic-gate 			    ("ipcl_classify_v6: found listner "
12990Sstevel@tonic-gate 			    "connp = %p\n", (void *)connp));
13000Sstevel@tonic-gate 
13010Sstevel@tonic-gate 			return (connp);
13020Sstevel@tonic-gate 		}
13030Sstevel@tonic-gate 
13040Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
13050Sstevel@tonic-gate 
13060Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
13070Sstevel@tonic-gate 		    ("ipcl_classify_v6: couldn't classify mp = %p\n",
13080Sstevel@tonic-gate 		    (void *)mp));
13090Sstevel@tonic-gate 		break;
13100Sstevel@tonic-gate 
13110Sstevel@tonic-gate 	case IPPROTO_UDP:
13120Sstevel@tonic-gate 		up = (uint16_t *)&mp->b_rptr[hdr_len];
13130Sstevel@tonic-gate 		lport = up[1];
13140Sstevel@tonic-gate 		fport = up[0];
13150Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport,
13160Sstevel@tonic-gate 		    fport));
13170Sstevel@tonic-gate 		connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
13180Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
13190Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
13200Sstevel@tonic-gate 		    connp = connp->conn_next) {
13210Sstevel@tonic-gate 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
13220Sstevel@tonic-gate 			    fport, ip6h->ip6_src) &&
13230Sstevel@tonic-gate 			    connp->conn_zoneid == zoneid)
13240Sstevel@tonic-gate 				break;
13250Sstevel@tonic-gate 		}
13260Sstevel@tonic-gate 
13270Sstevel@tonic-gate 		if (connp != NULL) {
13280Sstevel@tonic-gate 			CONN_INC_REF(connp);
13290Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
13300Sstevel@tonic-gate 			return (connp);
13310Sstevel@tonic-gate 		}
13320Sstevel@tonic-gate 
13330Sstevel@tonic-gate 		/*
13340Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
13350Sstevel@tonic-gate 		 */
13360Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
13370Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
13380Sstevel@tonic-gate 		    ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x",
13390Sstevel@tonic-gate 		    lport, fport));
13400Sstevel@tonic-gate 		break;
13410Sstevel@tonic-gate 	}
13420Sstevel@tonic-gate 
13430Sstevel@tonic-gate 
13440Sstevel@tonic-gate 	return (NULL);
13450Sstevel@tonic-gate }
13460Sstevel@tonic-gate 
13470Sstevel@tonic-gate /*
13480Sstevel@tonic-gate  * wrapper around ipcl_classify_(v4,v6) routines.
13490Sstevel@tonic-gate  */
13500Sstevel@tonic-gate conn_t *
13510Sstevel@tonic-gate ipcl_classify(mblk_t *mp, zoneid_t zoneid)
13520Sstevel@tonic-gate {
13530Sstevel@tonic-gate 	uint16_t	hdr_len;
13540Sstevel@tonic-gate 	ipha_t		*ipha;
13550Sstevel@tonic-gate 	uint8_t		*nexthdrp;
13560Sstevel@tonic-gate 
13570Sstevel@tonic-gate 	if (MBLKL(mp) < sizeof (ipha_t))
13580Sstevel@tonic-gate 		return (NULL);
13590Sstevel@tonic-gate 
13600Sstevel@tonic-gate 	switch (IPH_HDR_VERSION(mp->b_rptr)) {
13610Sstevel@tonic-gate 	case IPV4_VERSION:
13620Sstevel@tonic-gate 		ipha = (ipha_t *)mp->b_rptr;
13630Sstevel@tonic-gate 		hdr_len = IPH_HDR_LENGTH(ipha);
13640Sstevel@tonic-gate 		return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len,
13650Sstevel@tonic-gate 		    zoneid));
13660Sstevel@tonic-gate 	case IPV6_VERSION:
13670Sstevel@tonic-gate 		if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr,
13680Sstevel@tonic-gate 		    &hdr_len, &nexthdrp))
13690Sstevel@tonic-gate 			return (NULL);
13700Sstevel@tonic-gate 
13710Sstevel@tonic-gate 		return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid));
13720Sstevel@tonic-gate 	}
13730Sstevel@tonic-gate 
13740Sstevel@tonic-gate 	return (NULL);
13750Sstevel@tonic-gate }
13760Sstevel@tonic-gate 
13770Sstevel@tonic-gate conn_t *
13780Sstevel@tonic-gate ipcl_classify_raw(uint8_t protocol, zoneid_t zoneid, uint32_t ports,
13790Sstevel@tonic-gate     ipha_t *hdr)
13800Sstevel@tonic-gate {
13810Sstevel@tonic-gate 	struct connf_s	*connfp;
13820Sstevel@tonic-gate 	conn_t		*connp;
13830Sstevel@tonic-gate 	in_port_t	lport;
13840Sstevel@tonic-gate 	int		af;
13850Sstevel@tonic-gate 
13860Sstevel@tonic-gate 	lport = ((uint16_t *)&ports)[1];
13870Sstevel@tonic-gate 	af = IPH_HDR_VERSION(hdr);
13880Sstevel@tonic-gate 	connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))];
13890Sstevel@tonic-gate 
13900Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
13910Sstevel@tonic-gate 	for (connp = connfp->connf_head; connp != NULL;
13920Sstevel@tonic-gate 	    connp = connp->conn_next) {
13930Sstevel@tonic-gate 		/* We don't allow v4 fallback for v6 raw socket. */
13940Sstevel@tonic-gate 		if ((af == (connp->conn_af_isv6 ? IPV4_VERSION :
13950Sstevel@tonic-gate 		    IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) {
13960Sstevel@tonic-gate 			continue;
13970Sstevel@tonic-gate 		}
13980Sstevel@tonic-gate 		if (connp->conn_fully_bound) {
13990Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
14000Sstevel@tonic-gate 				if (IPCL_CONN_MATCH(connp, protocol,
14010Sstevel@tonic-gate 				    hdr->ipha_src, hdr->ipha_dst, ports)) {
14020Sstevel@tonic-gate 					break;
14030Sstevel@tonic-gate 				}
14040Sstevel@tonic-gate 			} else {
14050Sstevel@tonic-gate 				if (IPCL_CONN_MATCH_V6(connp, protocol,
14060Sstevel@tonic-gate 				    ((ip6_t *)hdr)->ip6_src,
14070Sstevel@tonic-gate 				    ((ip6_t *)hdr)->ip6_dst, ports)) {
14080Sstevel@tonic-gate 					break;
14090Sstevel@tonic-gate 				}
14100Sstevel@tonic-gate 			}
14110Sstevel@tonic-gate 		} else {
14120Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
14130Sstevel@tonic-gate 				if (IPCL_BIND_MATCH(connp, protocol,
14140Sstevel@tonic-gate 				    hdr->ipha_dst, lport)) {
14150Sstevel@tonic-gate 					break;
14160Sstevel@tonic-gate 				}
14170Sstevel@tonic-gate 			} else {
14180Sstevel@tonic-gate 				if (IPCL_BIND_MATCH_V6(connp, protocol,
14190Sstevel@tonic-gate 				    ((ip6_t *)hdr)->ip6_dst, lport)) {
14200Sstevel@tonic-gate 					break;
14210Sstevel@tonic-gate 				}
14220Sstevel@tonic-gate 			}
14230Sstevel@tonic-gate 		}
14240Sstevel@tonic-gate 	}
14250Sstevel@tonic-gate 	if (connp != NULL) {
14260Sstevel@tonic-gate 		CONN_INC_REF(connp);
14270Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
14280Sstevel@tonic-gate 		return (connp);
14290Sstevel@tonic-gate 	}
14300Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
14310Sstevel@tonic-gate 	return (NULL);
14320Sstevel@tonic-gate }
14330Sstevel@tonic-gate 
14340Sstevel@tonic-gate /* ARGSUSED */
14350Sstevel@tonic-gate static int
14360Sstevel@tonic-gate ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags)
14370Sstevel@tonic-gate {
14380Sstevel@tonic-gate 	itc_t	*itc = (itc_t *)buf;
14390Sstevel@tonic-gate 	conn_t 	*connp = &itc->itc_conn;
14400Sstevel@tonic-gate 	tcp_t	*tcp = &itc->itc_tcp;
14410Sstevel@tonic-gate 	bzero(itc, sizeof (itc_t));
14420Sstevel@tonic-gate 	tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP);
14430Sstevel@tonic-gate 	connp->conn_tcp = tcp;
14440Sstevel@tonic-gate 	connp->conn_flags = IPCL_TCPCONN;
14450Sstevel@tonic-gate 	connp->conn_ulp = IPPROTO_TCP;
14460Sstevel@tonic-gate 	tcp->tcp_connp = connp;
14470Sstevel@tonic-gate 	return (0);
14480Sstevel@tonic-gate }
14490Sstevel@tonic-gate 
14500Sstevel@tonic-gate /* ARGSUSED */
14510Sstevel@tonic-gate static void
14520Sstevel@tonic-gate ipcl_tcpconn_destructor(void *buf, void *cdrarg)
14530Sstevel@tonic-gate {
14540Sstevel@tonic-gate 	tcp_timermp_free(((conn_t *)buf)->conn_tcp);
14550Sstevel@tonic-gate }
14560Sstevel@tonic-gate 
14570Sstevel@tonic-gate /*
14580Sstevel@tonic-gate  * All conns are inserted in a global multi-list for the benefit of
14590Sstevel@tonic-gate  * walkers. The walk is guaranteed to walk all open conns at the time
14600Sstevel@tonic-gate  * of the start of the walk exactly once. This property is needed to
14610Sstevel@tonic-gate  * achieve some cleanups during unplumb of interfaces. This is achieved
14620Sstevel@tonic-gate  * as follows.
14630Sstevel@tonic-gate  *
14640Sstevel@tonic-gate  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
14650Sstevel@tonic-gate  * call the insert and delete functions below at creation and deletion
14660Sstevel@tonic-gate  * time respectively. The conn never moves or changes its position in this
14670Sstevel@tonic-gate  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
14680Sstevel@tonic-gate  * won't increase due to walkers, once the conn deletion has started. Note
14690Sstevel@tonic-gate  * that we can't remove the conn from the global list and then wait for
14700Sstevel@tonic-gate  * the refcnt to drop to zero, since walkers would then see a truncated
14710Sstevel@tonic-gate  * list. CONN_INCIPIENT ensures that walkers don't start looking at
14720Sstevel@tonic-gate  * conns until ip_open is ready to make them globally visible.
14730Sstevel@tonic-gate  * The global round robin multi-list locks are held only to get the
14740Sstevel@tonic-gate  * next member/insertion/deletion and contention should be negligible
14750Sstevel@tonic-gate  * if the multi-list is much greater than the number of cpus.
14760Sstevel@tonic-gate  */
14770Sstevel@tonic-gate void
14780Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp)
14790Sstevel@tonic-gate {
14800Sstevel@tonic-gate 	int	index;
14810Sstevel@tonic-gate 
14820Sstevel@tonic-gate 	/*
14830Sstevel@tonic-gate 	 * No need for atomic here. Approximate even distribution
14840Sstevel@tonic-gate 	 * in the global lists is sufficient.
14850Sstevel@tonic-gate 	 */
14860Sstevel@tonic-gate 	conn_g_index++;
14870Sstevel@tonic-gate 	index = conn_g_index & (CONN_G_HASH_SIZE - 1);
14880Sstevel@tonic-gate 
14890Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
14900Sstevel@tonic-gate 	/*
14910Sstevel@tonic-gate 	 * Mark as INCIPIENT, so that walkers will ignore this
14920Sstevel@tonic-gate 	 * for now, till ip_open is ready to make it visible globally.
14930Sstevel@tonic-gate 	 */
14940Sstevel@tonic-gate 	connp->conn_state_flags |= CONN_INCIPIENT;
14950Sstevel@tonic-gate 
14960Sstevel@tonic-gate 	/* Insert at the head of the list */
14970Sstevel@tonic-gate 	mutex_enter(&ipcl_globalhash_fanout[index].connf_lock);
14980Sstevel@tonic-gate 	connp->conn_g_next = ipcl_globalhash_fanout[index].connf_head;
14990Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
15000Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp;
15010Sstevel@tonic-gate 	ipcl_globalhash_fanout[index].connf_head = connp;
15020Sstevel@tonic-gate 
15030Sstevel@tonic-gate 	/* The fanout bucket this conn points to */
15040Sstevel@tonic-gate 	connp->conn_g_fanout = &ipcl_globalhash_fanout[index];
15050Sstevel@tonic-gate 
15060Sstevel@tonic-gate 	mutex_exit(&ipcl_globalhash_fanout[index].connf_lock);
15070Sstevel@tonic-gate }
15080Sstevel@tonic-gate 
15090Sstevel@tonic-gate void
15100Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp)
15110Sstevel@tonic-gate {
15120Sstevel@tonic-gate 	/*
15130Sstevel@tonic-gate 	 * We were never inserted in the global multi list.
15140Sstevel@tonic-gate 	 * IPCL_NONE variety is never inserted in the global multilist
15150Sstevel@tonic-gate 	 * since it is presumed to not need any cleanup and is transient.
15160Sstevel@tonic-gate 	 */
15170Sstevel@tonic-gate 	if (connp->conn_g_fanout == NULL)
15180Sstevel@tonic-gate 		return;
15190Sstevel@tonic-gate 
15200Sstevel@tonic-gate 	mutex_enter(&connp->conn_g_fanout->connf_lock);
15210Sstevel@tonic-gate 	if (connp->conn_g_prev != NULL)
15220Sstevel@tonic-gate 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
15230Sstevel@tonic-gate 	else
15240Sstevel@tonic-gate 		connp->conn_g_fanout->connf_head = connp->conn_g_next;
15250Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
15260Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
15270Sstevel@tonic-gate 	mutex_exit(&connp->conn_g_fanout->connf_lock);
15280Sstevel@tonic-gate 
15290Sstevel@tonic-gate 	/* Better to stumble on a null pointer than to corrupt memory */
15300Sstevel@tonic-gate 	connp->conn_g_next = NULL;
15310Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
15320Sstevel@tonic-gate }
15330Sstevel@tonic-gate 
15340Sstevel@tonic-gate /*
15350Sstevel@tonic-gate  * Walk the list of all conn_t's in the system, calling the function provided
15360Sstevel@tonic-gate  * with the specified argument for each.
15370Sstevel@tonic-gate  * Applies to both IPv4 and IPv6.
15380Sstevel@tonic-gate  *
15390Sstevel@tonic-gate  * IPCs may hold pointers to ipif/ill. To guard against stale pointers
15400Sstevel@tonic-gate  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
15410Sstevel@tonic-gate  * unplumbed or removed. New conn_t's that are created while we are walking
15420Sstevel@tonic-gate  * may be missed by this walk, because they are not necessarily inserted
15430Sstevel@tonic-gate  * at the tail of the list. They are new conn_t's and thus don't have any
15440Sstevel@tonic-gate  * stale pointers. The CONN_CLOSING flag ensures that no new reference
15450Sstevel@tonic-gate  * is created to the struct that is going away.
15460Sstevel@tonic-gate  */
15470Sstevel@tonic-gate void
15480Sstevel@tonic-gate ipcl_walk(pfv_t func, void *arg)
15490Sstevel@tonic-gate {
15500Sstevel@tonic-gate 	int	i;
15510Sstevel@tonic-gate 	conn_t	*connp;
15520Sstevel@tonic-gate 	conn_t	*prev_connp;
15530Sstevel@tonic-gate 
15540Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
15550Sstevel@tonic-gate 		mutex_enter(&ipcl_globalhash_fanout[i].connf_lock);
15560Sstevel@tonic-gate 		prev_connp = NULL;
15570Sstevel@tonic-gate 		connp = ipcl_globalhash_fanout[i].connf_head;
15580Sstevel@tonic-gate 		while (connp != NULL) {
15590Sstevel@tonic-gate 			mutex_enter(&connp->conn_lock);
15600Sstevel@tonic-gate 			if (connp->conn_state_flags &
15610Sstevel@tonic-gate 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
15620Sstevel@tonic-gate 				mutex_exit(&connp->conn_lock);
15630Sstevel@tonic-gate 				connp = connp->conn_g_next;
15640Sstevel@tonic-gate 				continue;
15650Sstevel@tonic-gate 			}
15660Sstevel@tonic-gate 			CONN_INC_REF_LOCKED(connp);
15670Sstevel@tonic-gate 			mutex_exit(&connp->conn_lock);
15680Sstevel@tonic-gate 			mutex_exit(&ipcl_globalhash_fanout[i].connf_lock);
15690Sstevel@tonic-gate 			(*func)(connp, arg);
15700Sstevel@tonic-gate 			if (prev_connp != NULL)
15710Sstevel@tonic-gate 				CONN_DEC_REF(prev_connp);
15720Sstevel@tonic-gate 			mutex_enter(&ipcl_globalhash_fanout[i].connf_lock);
15730Sstevel@tonic-gate 			prev_connp = connp;
15740Sstevel@tonic-gate 			connp = connp->conn_g_next;
15750Sstevel@tonic-gate 		}
15760Sstevel@tonic-gate 		mutex_exit(&ipcl_globalhash_fanout[i].connf_lock);
15770Sstevel@tonic-gate 		if (prev_connp != NULL)
15780Sstevel@tonic-gate 			CONN_DEC_REF(prev_connp);
15790Sstevel@tonic-gate 	}
15800Sstevel@tonic-gate }
15810Sstevel@tonic-gate 
15820Sstevel@tonic-gate /*
15830Sstevel@tonic-gate  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
15840Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
15850Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
15860Sstevel@tonic-gate  * (peer tcp in at least ESTABLISHED state).
15870Sstevel@tonic-gate  */
15880Sstevel@tonic-gate conn_t *
15890Sstevel@tonic-gate ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph)
15900Sstevel@tonic-gate {
15910Sstevel@tonic-gate 	uint32_t ports;
15920Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
15930Sstevel@tonic-gate 	connf_t	*connfp;
15940Sstevel@tonic-gate 	conn_t	*tconnp;
15950Sstevel@tonic-gate 	boolean_t zone_chk;
15960Sstevel@tonic-gate 
15970Sstevel@tonic-gate 	/*
15980Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
15990Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
16000Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
16010Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.
16020Sstevel@tonic-gate 	 */
16030Sstevel@tonic-gate 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
16040Sstevel@tonic-gate 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
16050Sstevel@tonic-gate 
16060Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
16070Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
16080Sstevel@tonic-gate 
16090Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)];
16100Sstevel@tonic-gate 
16110Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
16120Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
16130Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
16140Sstevel@tonic-gate 
16150Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
16160Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
16170Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED &&
16180Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
16190Sstevel@tonic-gate 
16200Sstevel@tonic-gate 			ASSERT(tconnp != connp);
16210Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
16220Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
16230Sstevel@tonic-gate 			return (tconnp);
16240Sstevel@tonic-gate 		}
16250Sstevel@tonic-gate 	}
16260Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
16270Sstevel@tonic-gate 	return (NULL);
16280Sstevel@tonic-gate }
16290Sstevel@tonic-gate 
16300Sstevel@tonic-gate /*
16310Sstevel@tonic-gate  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
16320Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
16330Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
16340Sstevel@tonic-gate  * (peer tcp in at least ESTABLISHED state).
16350Sstevel@tonic-gate  */
16360Sstevel@tonic-gate conn_t *
16370Sstevel@tonic-gate ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph)
16380Sstevel@tonic-gate {
16390Sstevel@tonic-gate 	uint32_t ports;
16400Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
16410Sstevel@tonic-gate 	connf_t	*connfp;
16420Sstevel@tonic-gate 	conn_t	*tconnp;
16430Sstevel@tonic-gate 	boolean_t zone_chk;
16440Sstevel@tonic-gate 
16450Sstevel@tonic-gate 	/*
16460Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
16470Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
16480Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
16490Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.  We
16500Sstevel@tonic-gate 	 * don't do Zone check for link local address(es) because the
16510Sstevel@tonic-gate 	 * current Zone implementation treats each link local address as
16520Sstevel@tonic-gate 	 * being unique per system node, i.e. they belong to global Zone.
16530Sstevel@tonic-gate 	 */
16540Sstevel@tonic-gate 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
16550Sstevel@tonic-gate 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
16560Sstevel@tonic-gate 
16570Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
16580Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
16590Sstevel@tonic-gate 
16600Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)];
16610Sstevel@tonic-gate 
16620Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
16630Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
16640Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
16650Sstevel@tonic-gate 
16660Sstevel@tonic-gate 		/* We skip tcp_bound_if check here as this is loopback tcp */
16670Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
16680Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
16690Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED &&
16700Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
16710Sstevel@tonic-gate 
16720Sstevel@tonic-gate 			ASSERT(tconnp != connp);
16730Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
16740Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
16750Sstevel@tonic-gate 			return (tconnp);
16760Sstevel@tonic-gate 		}
16770Sstevel@tonic-gate 	}
16780Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
16790Sstevel@tonic-gate 	return (NULL);
16800Sstevel@tonic-gate }
16810Sstevel@tonic-gate 
16820Sstevel@tonic-gate /*
16830Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
16840Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
16850Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
16860Sstevel@tonic-gate  */
16870Sstevel@tonic-gate conn_t *
16880Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state)
16890Sstevel@tonic-gate {
16900Sstevel@tonic-gate 	uint32_t ports;
16910Sstevel@tonic-gate 	uint16_t *pports;
16920Sstevel@tonic-gate 	connf_t	*connfp;
16930Sstevel@tonic-gate 	conn_t	*tconnp;
16940Sstevel@tonic-gate 
16950Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
16960Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
16970Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
16980Sstevel@tonic-gate 
16990Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)];
17000Sstevel@tonic-gate 
17010Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
17020Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
17030Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
17040Sstevel@tonic-gate 
17050Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
17060Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
17070Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= min_state) {
17080Sstevel@tonic-gate 
17090Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
17100Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17110Sstevel@tonic-gate 			return (tconnp);
17120Sstevel@tonic-gate 		}
17130Sstevel@tonic-gate 	}
17140Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
17150Sstevel@tonic-gate 	return (NULL);
17160Sstevel@tonic-gate }
17170Sstevel@tonic-gate 
17180Sstevel@tonic-gate /*
17190Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
17200Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
17210Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
17220Sstevel@tonic-gate  * Match on ifindex in addition to addresses.
17230Sstevel@tonic-gate  */
17240Sstevel@tonic-gate conn_t *
17250Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
17260Sstevel@tonic-gate     uint_t ifindex)
17270Sstevel@tonic-gate {
17280Sstevel@tonic-gate 	tcp_t	*tcp;
17290Sstevel@tonic-gate 	uint32_t ports;
17300Sstevel@tonic-gate 	uint16_t *pports;
17310Sstevel@tonic-gate 	connf_t	*connfp;
17320Sstevel@tonic-gate 	conn_t	*tconnp;
17330Sstevel@tonic-gate 
17340Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
17350Sstevel@tonic-gate 	pports[0] = tcpha->tha_fport;
17360Sstevel@tonic-gate 	pports[1] = tcpha->tha_lport;
17370Sstevel@tonic-gate 
17380Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)];
17390Sstevel@tonic-gate 
17400Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
17410Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
17420Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
17430Sstevel@tonic-gate 
17440Sstevel@tonic-gate 		tcp = tconnp->conn_tcp;
17450Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
17460Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
17470Sstevel@tonic-gate 		    tcp->tcp_state >= min_state &&
17480Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
17490Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex)) {
17500Sstevel@tonic-gate 
17510Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
17520Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17530Sstevel@tonic-gate 			return (tconnp);
17540Sstevel@tonic-gate 		}
17550Sstevel@tonic-gate 	}
17560Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
17570Sstevel@tonic-gate 	return (NULL);
17580Sstevel@tonic-gate }
17590Sstevel@tonic-gate 
17600Sstevel@tonic-gate /*
17610Sstevel@tonic-gate  * To find a TCP listening connection matching the incoming segment.
17620Sstevel@tonic-gate  */
17630Sstevel@tonic-gate conn_t *
17640Sstevel@tonic-gate ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid)
17650Sstevel@tonic-gate {
17660Sstevel@tonic-gate 	connf_t		*bind_connfp;
17670Sstevel@tonic-gate 	conn_t		*connp;
17680Sstevel@tonic-gate 	tcp_t		*tcp;
17690Sstevel@tonic-gate 
17700Sstevel@tonic-gate 	/*
17710Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
17720Sstevel@tonic-gate 	 * all zeros.
17730Sstevel@tonic-gate 	 */
17740Sstevel@tonic-gate 	if (laddr == 0)
17750Sstevel@tonic-gate 		return (NULL);
17760Sstevel@tonic-gate 
17770Sstevel@tonic-gate 	bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
17780Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
17790Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
17800Sstevel@tonic-gate 	    connp = connp->conn_next) {
17810Sstevel@tonic-gate 		tcp = connp->conn_tcp;
17820Sstevel@tonic-gate 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
17830Sstevel@tonic-gate 		    connp->conn_zoneid == zoneid &&
17840Sstevel@tonic-gate 		    (tcp->tcp_listener == NULL)) {
17850Sstevel@tonic-gate 			CONN_INC_REF(connp);
17860Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
17870Sstevel@tonic-gate 			return (connp);
17880Sstevel@tonic-gate 		}
17890Sstevel@tonic-gate 	}
17900Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
17910Sstevel@tonic-gate 	return (NULL);
17920Sstevel@tonic-gate }
17930Sstevel@tonic-gate 
17940Sstevel@tonic-gate 
17950Sstevel@tonic-gate conn_t *
17960Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
17970Sstevel@tonic-gate     zoneid_t zoneid)
17980Sstevel@tonic-gate {
17990Sstevel@tonic-gate 	connf_t		*bind_connfp;
18000Sstevel@tonic-gate 	conn_t		*connp = NULL;
18010Sstevel@tonic-gate 	tcp_t		*tcp;
18020Sstevel@tonic-gate 
18030Sstevel@tonic-gate 	/*
18040Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
18050Sstevel@tonic-gate 	 * all zeros.
18060Sstevel@tonic-gate 	 */
18070Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
18080Sstevel@tonic-gate 		return (NULL);
18090Sstevel@tonic-gate 
18100Sstevel@tonic-gate 
18110Sstevel@tonic-gate 	bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
18120Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
18130Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
18140Sstevel@tonic-gate 	    connp = connp->conn_next) {
18150Sstevel@tonic-gate 		tcp = connp->conn_tcp;
18160Sstevel@tonic-gate 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
18170Sstevel@tonic-gate 		    connp->conn_zoneid == zoneid &&
18180Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
18190Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex) &&
18200Sstevel@tonic-gate 		    tcp->tcp_listener == NULL) {
18210Sstevel@tonic-gate 			CONN_INC_REF(connp);
18220Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
18230Sstevel@tonic-gate 			return (connp);
18240Sstevel@tonic-gate 		}
18250Sstevel@tonic-gate 	}
18260Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
18270Sstevel@tonic-gate 	return (NULL);
18280Sstevel@tonic-gate }
18290Sstevel@tonic-gate 
18300Sstevel@tonic-gate #ifdef CONN_DEBUG
18310Sstevel@tonic-gate /*
18320Sstevel@tonic-gate  * Trace of the last NBUF refhold/refrele
18330Sstevel@tonic-gate  */
18340Sstevel@tonic-gate int
18350Sstevel@tonic-gate conn_trace_ref(conn_t *connp)
18360Sstevel@tonic-gate {
18370Sstevel@tonic-gate 	int	last;
18380Sstevel@tonic-gate 	conn_trace_t	*ctb;
18390Sstevel@tonic-gate 
18400Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
18410Sstevel@tonic-gate 	last = connp->conn_trace_last;
18420Sstevel@tonic-gate 	last++;
18430Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
18440Sstevel@tonic-gate 		last = 0;
18450Sstevel@tonic-gate 
18460Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
18470Sstevel@tonic-gate 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH);
18480Sstevel@tonic-gate 	connp->conn_trace_last = last;
18490Sstevel@tonic-gate 	return (1);
18500Sstevel@tonic-gate }
18510Sstevel@tonic-gate 
18520Sstevel@tonic-gate int
18530Sstevel@tonic-gate conn_untrace_ref(conn_t *connp)
18540Sstevel@tonic-gate {
18550Sstevel@tonic-gate 	int	last;
18560Sstevel@tonic-gate 	conn_trace_t	*ctb;
18570Sstevel@tonic-gate 
18580Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
18590Sstevel@tonic-gate 	last = connp->conn_trace_last;
18600Sstevel@tonic-gate 	last++;
18610Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
18620Sstevel@tonic-gate 		last = 0;
18630Sstevel@tonic-gate 
18640Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
18650Sstevel@tonic-gate 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH);
18660Sstevel@tonic-gate 	connp->conn_trace_last = last;
18670Sstevel@tonic-gate 	return (1);
18680Sstevel@tonic-gate }
18690Sstevel@tonic-gate #endif
1870