xref: /onnv-gate/usr/src/uts/common/inet/ip/ipclassifier.c (revision 409:22012dc8ea5b)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
23153Sethindra  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate const char ipclassifier_version[] = "@(#)ipclassifier.c	1.6	04/03/31 SMI";
300Sstevel@tonic-gate 
310Sstevel@tonic-gate /*
320Sstevel@tonic-gate  * IP PACKET CLASSIFIER
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * The IP packet classifier provides mapping between IP packets and persistent
350Sstevel@tonic-gate  * connection state for connection-oriented protocols. It also provides
360Sstevel@tonic-gate  * interface for managing connection states.
370Sstevel@tonic-gate  *
380Sstevel@tonic-gate  * The connection state is kept in conn_t data structure and contains, among
390Sstevel@tonic-gate  * other things:
400Sstevel@tonic-gate  *
410Sstevel@tonic-gate  *	o local/remote address and ports
420Sstevel@tonic-gate  *	o Transport protocol
430Sstevel@tonic-gate  *	o squeue for the connection (for TCP only)
440Sstevel@tonic-gate  *	o reference counter
450Sstevel@tonic-gate  *	o Connection state
460Sstevel@tonic-gate  *	o hash table linkage
470Sstevel@tonic-gate  *	o interface/ire information
480Sstevel@tonic-gate  *	o credentials
490Sstevel@tonic-gate  *	o ipsec policy
500Sstevel@tonic-gate  *	o send and receive functions.
510Sstevel@tonic-gate  *	o mutex lock.
520Sstevel@tonic-gate  *
530Sstevel@tonic-gate  * Connections use a reference counting scheme. They are freed when the
540Sstevel@tonic-gate  * reference counter drops to zero. A reference is incremented when connection
550Sstevel@tonic-gate  * is placed in a list or table, when incoming packet for the connection arrives
560Sstevel@tonic-gate  * and when connection is processed via squeue (squeue processing may be
570Sstevel@tonic-gate  * asynchronous and the reference protects the connection from being destroyed
580Sstevel@tonic-gate  * before its processing is finished).
590Sstevel@tonic-gate  *
600Sstevel@tonic-gate  * send and receive functions are currently used for TCP only. The send function
610Sstevel@tonic-gate  * determines the IP entry point for the packet once it leaves TCP to be sent to
620Sstevel@tonic-gate  * the destination address. The receive function is used by IP when the packet
630Sstevel@tonic-gate  * should be passed for TCP processing. When a new connection is created these
640Sstevel@tonic-gate  * are set to ip_output() and tcp_input() respectively. During the lifetime of
650Sstevel@tonic-gate  * the connection the send and receive functions may change depending on the
660Sstevel@tonic-gate  * changes in the connection state. For example, Once the connection is bound to
670Sstevel@tonic-gate  * an addresse, the receive function for this connection is set to
680Sstevel@tonic-gate  * tcp_conn_request().  This allows incoming SYNs to go directly into the
690Sstevel@tonic-gate  * listener SYN processing function without going to tcp_input() first.
700Sstevel@tonic-gate  *
710Sstevel@tonic-gate  * Classifier uses several hash tables:
720Sstevel@tonic-gate  *
730Sstevel@tonic-gate  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
740Sstevel@tonic-gate  *	ipcl_bind_fanout:	contains all connections in BOUND state
750Sstevel@tonic-gate  *	ipcl_proto_fanout:	IPv4 protocol fanout
760Sstevel@tonic-gate  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
770Sstevel@tonic-gate  *	ipcl_udp_fanout:	contains all UDP connections
780Sstevel@tonic-gate  *	ipcl_globalhash_fanout:	contains all connections
790Sstevel@tonic-gate  *
800Sstevel@tonic-gate  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
810Sstevel@tonic-gate  * which need to view all existing connections.
820Sstevel@tonic-gate  *
830Sstevel@tonic-gate  * All tables are protected by per-bucket locks. When both per-bucket lock and
840Sstevel@tonic-gate  * connection lock need to be held, the per-bucket lock should be acquired
850Sstevel@tonic-gate  * first, followed by the connection lock.
860Sstevel@tonic-gate  *
870Sstevel@tonic-gate  * All functions doing search in one of these tables increment a reference
880Sstevel@tonic-gate  * counter on the connection found (if any). This reference should be dropped
890Sstevel@tonic-gate  * when the caller has finished processing the connection.
900Sstevel@tonic-gate  *
910Sstevel@tonic-gate  *
920Sstevel@tonic-gate  * INTERFACES:
930Sstevel@tonic-gate  * ===========
940Sstevel@tonic-gate  *
950Sstevel@tonic-gate  * Connection Lookup:
960Sstevel@tonic-gate  * ------------------
970Sstevel@tonic-gate  *
980Sstevel@tonic-gate  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid)
990Sstevel@tonic-gate  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid)
1000Sstevel@tonic-gate  *
1010Sstevel@tonic-gate  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
1020Sstevel@tonic-gate  * it can't find any associated connection. If the connection is found, its
1030Sstevel@tonic-gate  * reference counter is incremented.
1040Sstevel@tonic-gate  *
1050Sstevel@tonic-gate  *	mp:	mblock, containing packet header. The full header should fit
1060Sstevel@tonic-gate  *		into a single mblock. It should also contain at least full IP
1070Sstevel@tonic-gate  *		and TCP or UDP header.
1080Sstevel@tonic-gate  *
1090Sstevel@tonic-gate  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
1100Sstevel@tonic-gate  *
1110Sstevel@tonic-gate  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
1120Sstevel@tonic-gate  *		 the packet.
1130Sstevel@tonic-gate  *
1140Sstevel@tonic-gate  * 	zoneid: The zone in which the returned connection must be.
1150Sstevel@tonic-gate  *
1160Sstevel@tonic-gate  *	For TCP connections, the lookup order is as follows:
1170Sstevel@tonic-gate  *		5-tuple {src, dst, protocol, local port, remote port}
1180Sstevel@tonic-gate  *			lookup in ipcl_conn_fanout table.
1190Sstevel@tonic-gate  *		3-tuple {dst, remote port, protocol} lookup in
1200Sstevel@tonic-gate  *			ipcl_bind_fanout table.
1210Sstevel@tonic-gate  *
1220Sstevel@tonic-gate  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
1230Sstevel@tonic-gate  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
1240Sstevel@tonic-gate  *	these interfaces do not handle cases where a packets belongs
1250Sstevel@tonic-gate  *	to multiple UDP clients, which is handled in IP itself.
1260Sstevel@tonic-gate  *
1270Sstevel@tonic-gate  * conn_t	*ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int);
1280Sstevel@tonic-gate  * conn_t	*ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t);
1290Sstevel@tonic-gate  *
1300Sstevel@tonic-gate  *	Lookup routine to find a exact match for {src, dst, local port,
1310Sstevel@tonic-gate  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
1320Sstevel@tonic-gate  *	ports are read from the IP and TCP header respectively.
1330Sstevel@tonic-gate  *
1340Sstevel@tonic-gate  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol);
1350Sstevel@tonic-gate  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex);
1360Sstevel@tonic-gate  *
1370Sstevel@tonic-gate  * 	Lookup routine to find a listener with the tuple {lport, laddr,
1380Sstevel@tonic-gate  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
1390Sstevel@tonic-gate  * 	parameter interface index is also compared.
1400Sstevel@tonic-gate  *
1410Sstevel@tonic-gate  * void ipcl_walk(func, arg)
1420Sstevel@tonic-gate  *
1430Sstevel@tonic-gate  * 	Apply 'func' to every connection available. The 'func' is called as
1440Sstevel@tonic-gate  *	(*func)(connp, arg). The walk is non-atomic so connections may be
1450Sstevel@tonic-gate  *	created and destroyed during the walk. The CONN_CONDEMNED and
1460Sstevel@tonic-gate  *	CONN_INCIPIENT flags ensure that connections which are newly created
1470Sstevel@tonic-gate  *	or being destroyed are not selected by the walker.
1480Sstevel@tonic-gate  *
1490Sstevel@tonic-gate  * Table Updates
1500Sstevel@tonic-gate  * -------------
1510Sstevel@tonic-gate  *
1520Sstevel@tonic-gate  * int ipcl_conn_insert(connp, protocol, src, dst, ports)
1530Sstevel@tonic-gate  * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex)
1540Sstevel@tonic-gate  *
1550Sstevel@tonic-gate  *	Insert 'connp' in the ipcl_conn_fanout.
1560Sstevel@tonic-gate  *	Arguements :
1570Sstevel@tonic-gate  *		connp		conn_t to be inserted
1580Sstevel@tonic-gate  *		protocol	connection protocol
1590Sstevel@tonic-gate  *		src		source address
1600Sstevel@tonic-gate  *		dst		destination address
1610Sstevel@tonic-gate  *		ports		local and remote port
1620Sstevel@tonic-gate  *		ifindex		interface index for IPv6 connections
1630Sstevel@tonic-gate  *
1640Sstevel@tonic-gate  *	Return value :
1650Sstevel@tonic-gate  *		0		if connp was inserted
1660Sstevel@tonic-gate  *		EADDRINUSE	if the connection with the same tuple
1670Sstevel@tonic-gate  *				already exists.
1680Sstevel@tonic-gate  *
1690Sstevel@tonic-gate  * int ipcl_bind_insert(connp, protocol, src, lport);
1700Sstevel@tonic-gate  * int ipcl_bind_insert_v6(connp, protocol, src, lport);
1710Sstevel@tonic-gate  *
1720Sstevel@tonic-gate  * 	Insert 'connp' in ipcl_bind_fanout.
1730Sstevel@tonic-gate  * 	Arguements :
1740Sstevel@tonic-gate  * 		connp		conn_t to be inserted
1750Sstevel@tonic-gate  * 		protocol	connection protocol
1760Sstevel@tonic-gate  * 		src		source address connection wants
1770Sstevel@tonic-gate  * 				to bind to
1780Sstevel@tonic-gate  * 		lport		local port connection wants to
1790Sstevel@tonic-gate  * 				bind to
1800Sstevel@tonic-gate  *
1810Sstevel@tonic-gate  *
1820Sstevel@tonic-gate  * void ipcl_hash_remove(connp);
1830Sstevel@tonic-gate  *
1840Sstevel@tonic-gate  * 	Removes the 'connp' from the connection fanout table.
1850Sstevel@tonic-gate  *
1860Sstevel@tonic-gate  * Connection Creation/Destruction
1870Sstevel@tonic-gate  * -------------------------------
1880Sstevel@tonic-gate  *
1890Sstevel@tonic-gate  * conn_t *ipcl_conn_create(type, sleep)
1900Sstevel@tonic-gate  *
1910Sstevel@tonic-gate  * 	Creates a new conn based on the type flag, inserts it into
1920Sstevel@tonic-gate  * 	globalhash table.
1930Sstevel@tonic-gate  *
1940Sstevel@tonic-gate  *	type:	This flag determines the type of conn_t which needs to be
1950Sstevel@tonic-gate  *		created.
1960Sstevel@tonic-gate  *		IPCL_TCPCONN	indicates a TCP connection
1970Sstevel@tonic-gate  *		IPCL_IPCONN	indicates all non-TCP connections.
1980Sstevel@tonic-gate  *
1990Sstevel@tonic-gate  * void ipcl_conn_destroy(connp)
2000Sstevel@tonic-gate  *
2010Sstevel@tonic-gate  * 	Destroys the connection state, removes it from the global
2020Sstevel@tonic-gate  * 	connection hash table and frees its memory.
2030Sstevel@tonic-gate  */
2040Sstevel@tonic-gate 
2050Sstevel@tonic-gate #include <sys/types.h>
2060Sstevel@tonic-gate #include <sys/stream.h>
2070Sstevel@tonic-gate #include <sys/dlpi.h>
2080Sstevel@tonic-gate #include <sys/stropts.h>
2090Sstevel@tonic-gate #include <sys/sysmacros.h>
2100Sstevel@tonic-gate #include <sys/strsubr.h>
2110Sstevel@tonic-gate #include <sys/strlog.h>
2120Sstevel@tonic-gate #include <sys/strsun.h>
2130Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
2140Sstevel@tonic-gate #include <sys/ddi.h>
2150Sstevel@tonic-gate #include <sys/cmn_err.h>
2160Sstevel@tonic-gate #include <sys/debug.h>
2170Sstevel@tonic-gate 
2180Sstevel@tonic-gate #include <sys/systm.h>
2190Sstevel@tonic-gate #include <sys/param.h>
2200Sstevel@tonic-gate #include <sys/kmem.h>
2210Sstevel@tonic-gate #include <sys/isa_defs.h>
2220Sstevel@tonic-gate #include <inet/common.h>
2230Sstevel@tonic-gate #include <netinet/ip6.h>
2240Sstevel@tonic-gate #include <netinet/icmp6.h>
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate #include <inet/ip.h>
2270Sstevel@tonic-gate #include <inet/ip6.h>
2280Sstevel@tonic-gate #include <inet/tcp.h>
2290Sstevel@tonic-gate #include <inet/tcp_trace.h>
2300Sstevel@tonic-gate #include <inet/ip_multi.h>
2310Sstevel@tonic-gate #include <inet/ip_if.h>
2320Sstevel@tonic-gate #include <inet/ip_ire.h>
2330Sstevel@tonic-gate #include <inet/ip_rts.h>
2340Sstevel@tonic-gate #include <inet/optcom.h>
2350Sstevel@tonic-gate #include <inet/ip_ndp.h>
2360Sstevel@tonic-gate #include <inet/sctp_ip.h>
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate #include <sys/ethernet.h>
2390Sstevel@tonic-gate #include <net/if_types.h>
2400Sstevel@tonic-gate #include <sys/cpuvar.h>
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate #include <inet/mi.h>
2430Sstevel@tonic-gate #include <inet/ipclassifier.h>
2440Sstevel@tonic-gate #include <inet/ipsec_impl.h>
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate #ifdef DEBUG
2470Sstevel@tonic-gate #define	IPCL_DEBUG
2480Sstevel@tonic-gate #else
2490Sstevel@tonic-gate #undef	IPCL_DEBUG
2500Sstevel@tonic-gate #endif
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate #ifdef	IPCL_DEBUG
2530Sstevel@tonic-gate int	ipcl_debug_level = 0;
2540Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args)	\
2550Sstevel@tonic-gate 	if (ipcl_debug_level  & level) { printf args; }
2560Sstevel@tonic-gate #else
2570Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args) {; }
2580Sstevel@tonic-gate #endif
2590Sstevel@tonic-gate connf_t	*ipcl_conn_fanout;
2600Sstevel@tonic-gate connf_t	*ipcl_bind_fanout;
2610Sstevel@tonic-gate connf_t	ipcl_proto_fanout[IPPROTO_MAX + 1];
2620Sstevel@tonic-gate connf_t	ipcl_proto_fanout_v6[IPPROTO_MAX + 1];
2630Sstevel@tonic-gate connf_t	*ipcl_udp_fanout;
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate /* A separate hash list for raw socket. */
2660Sstevel@tonic-gate connf_t *ipcl_raw_fanout;
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate connf_t rts_clients;
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate /* Old value for compatibility */
2710Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0;
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate /* New value. Zero means choose automatically. */
2740Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0;
2750Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192;
2760Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500;
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate uint_t ipcl_conn_fanout_size = 0;
2790Sstevel@tonic-gate 
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate /* bind/udp fanout table size */
2820Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512;
2830Sstevel@tonic-gate uint_t ipcl_udp_fanout_size = 256;
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate /* Raw socket fanout size.  Must be a power of 2. */
2860Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256;
2870Sstevel@tonic-gate 
2880Sstevel@tonic-gate /*
2890Sstevel@tonic-gate  * Power of 2^N Primes useful for hashing for N of 0-28,
2900Sstevel@tonic-gate  * these primes are the nearest prime <= 2^N - 2^(N-2).
2910Sstevel@tonic-gate  */
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
2940Sstevel@tonic-gate 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
2950Sstevel@tonic-gate 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
2960Sstevel@tonic-gate 		50331599, 100663291, 201326557, 0}
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate /*
2990Sstevel@tonic-gate  * wrapper structure to ensure that conn+tcpb are aligned
3000Sstevel@tonic-gate  * on cache lines.
3010Sstevel@tonic-gate  */
3020Sstevel@tonic-gate typedef struct itc_s {
3030Sstevel@tonic-gate 	union {
3040Sstevel@tonic-gate 		conn_t	itcu_conn;
3050Sstevel@tonic-gate 		char	itcu_filler[CACHE_ALIGN(conn_s)];
3060Sstevel@tonic-gate 	}	itc_u;
3070Sstevel@tonic-gate 	tcp_t	itc_tcp;
3080Sstevel@tonic-gate } itc_t;
3090Sstevel@tonic-gate 
3100Sstevel@tonic-gate #define	itc_conn	itc_u.itcu_conn
3110Sstevel@tonic-gate 
3120Sstevel@tonic-gate struct kmem_cache  *ipcl_tcpconn_cache;
3130Sstevel@tonic-gate struct kmem_cache  *ipcl_tcp_cache;
3140Sstevel@tonic-gate struct kmem_cache  *ipcl_conn_cache;
3150Sstevel@tonic-gate extern struct kmem_cache  *sctp_conn_cache;
3160Sstevel@tonic-gate extern struct kmem_cache  *tcp_sack_info_cache;
3170Sstevel@tonic-gate extern struct kmem_cache  *tcp_iphc_cache;
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate extern void	tcp_timermp_free(tcp_t *);
3200Sstevel@tonic-gate extern mblk_t	*tcp_timermp_alloc(int);
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate static int	ipcl_tcpconn_constructor(void *, void *, int);
3230Sstevel@tonic-gate static void	ipcl_tcpconn_destructor(void *, void *);
3240Sstevel@tonic-gate 
3250Sstevel@tonic-gate static int conn_g_index;
3260Sstevel@tonic-gate connf_t	*ipcl_globalhash_fanout;
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate #ifdef	IPCL_DEBUG
3290Sstevel@tonic-gate #define	INET_NTOA_BUFSIZE	18
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate static char *
3320Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b)
3330Sstevel@tonic-gate {
3340Sstevel@tonic-gate 	unsigned char	*p;
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 	p = (unsigned char *)&in;
3370Sstevel@tonic-gate 	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
3380Sstevel@tonic-gate 	return (b);
3390Sstevel@tonic-gate }
3400Sstevel@tonic-gate #endif
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate /*
3430Sstevel@tonic-gate  * ipclassifier intialization routine, sets up hash tables and
3440Sstevel@tonic-gate  * conn caches.
3450Sstevel@tonic-gate  */
3460Sstevel@tonic-gate void
3470Sstevel@tonic-gate ipcl_init(void)
3480Sstevel@tonic-gate {
3490Sstevel@tonic-gate 	int i;
3500Sstevel@tonic-gate 	int sizes[] = P2Ps();
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate 	ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache",
3530Sstevel@tonic-gate 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
3540Sstevel@tonic-gate 	    NULL, NULL,
3550Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
3560Sstevel@tonic-gate 
3570Sstevel@tonic-gate 	ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache",
3580Sstevel@tonic-gate 	    sizeof (itc_t), CACHE_ALIGN_SIZE,
3590Sstevel@tonic-gate 	    ipcl_tcpconn_constructor, ipcl_tcpconn_destructor,
3600Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
3610Sstevel@tonic-gate 
3620Sstevel@tonic-gate 	/*
3630Sstevel@tonic-gate 	 * Calculate size of conn fanout table.
3640Sstevel@tonic-gate 	 */
3650Sstevel@tonic-gate 	if (ipcl_conn_hash_size != 0) {
3660Sstevel@tonic-gate 		ipcl_conn_fanout_size = ipcl_conn_hash_size;
3670Sstevel@tonic-gate 	} else if (tcp_conn_hash_size != 0) {
3680Sstevel@tonic-gate 		ipcl_conn_fanout_size = tcp_conn_hash_size;
3690Sstevel@tonic-gate 	} else {
3700Sstevel@tonic-gate 		extern pgcnt_t freemem;
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate 		ipcl_conn_fanout_size =
3730Sstevel@tonic-gate 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
3740Sstevel@tonic-gate 
3750Sstevel@tonic-gate 		if (ipcl_conn_fanout_size > ipcl_conn_hash_maxsize)
3760Sstevel@tonic-gate 			ipcl_conn_fanout_size = ipcl_conn_hash_maxsize;
3770Sstevel@tonic-gate 	}
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
3800Sstevel@tonic-gate 		if (sizes[i] >= ipcl_conn_fanout_size) {
3810Sstevel@tonic-gate 			break;
3820Sstevel@tonic-gate 		}
3830Sstevel@tonic-gate 	}
3840Sstevel@tonic-gate 	if ((ipcl_conn_fanout_size = sizes[i]) == 0) {
3850Sstevel@tonic-gate 		/* Out of range, use the 2^16 value */
3860Sstevel@tonic-gate 		ipcl_conn_fanout_size = sizes[16];
3870Sstevel@tonic-gate 	}
3880Sstevel@tonic-gate 	ipcl_conn_fanout = (connf_t *)kmem_zalloc(ipcl_conn_fanout_size *
3890Sstevel@tonic-gate 	    sizeof (*ipcl_conn_fanout), KM_SLEEP);
3900Sstevel@tonic-gate 
3910Sstevel@tonic-gate 	for (i = 0; i < ipcl_conn_fanout_size; i++) {
3920Sstevel@tonic-gate 		mutex_init(&ipcl_conn_fanout[i].connf_lock, NULL,
3930Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
3940Sstevel@tonic-gate 	}
3950Sstevel@tonic-gate 
3960Sstevel@tonic-gate 	ipcl_bind_fanout = (connf_t *)kmem_zalloc(ipcl_bind_fanout_size *
3970Sstevel@tonic-gate 	    sizeof (*ipcl_bind_fanout), KM_SLEEP);
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate 	for (i = 0; i < ipcl_bind_fanout_size; i++) {
4000Sstevel@tonic-gate 		mutex_init(&ipcl_bind_fanout[i].connf_lock, NULL,
4010Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4020Sstevel@tonic-gate 	}
4030Sstevel@tonic-gate 
4040Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) {
4050Sstevel@tonic-gate 		mutex_init(&ipcl_proto_fanout[i].connf_lock, NULL,
4060Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4070Sstevel@tonic-gate 	}
4080Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) {
4090Sstevel@tonic-gate 		mutex_init(&ipcl_proto_fanout_v6[i].connf_lock, NULL,
4100Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4110Sstevel@tonic-gate 	}
4120Sstevel@tonic-gate 
4130Sstevel@tonic-gate 	mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL);
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 	ipcl_udp_fanout = (connf_t *)kmem_zalloc(ipcl_udp_fanout_size *
4160Sstevel@tonic-gate 	    sizeof (*ipcl_udp_fanout), KM_SLEEP);
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 	for (i = 0; i < ipcl_udp_fanout_size; i++) {
4190Sstevel@tonic-gate 		mutex_init(&ipcl_udp_fanout[i].connf_lock, NULL,
4200Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4210Sstevel@tonic-gate 	}
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 	ipcl_raw_fanout = (connf_t *)kmem_zalloc(ipcl_raw_fanout_size *
4240Sstevel@tonic-gate 	    sizeof (*ipcl_raw_fanout), KM_SLEEP);
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate 	for (i = 0; i < ipcl_raw_fanout_size; i++) {
4270Sstevel@tonic-gate 		mutex_init(&ipcl_raw_fanout[i].connf_lock, NULL,
4280Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4290Sstevel@tonic-gate 	}
4300Sstevel@tonic-gate 
4310Sstevel@tonic-gate 	ipcl_globalhash_fanout = (connf_t *)kmem_zalloc(sizeof (connf_t) *
4320Sstevel@tonic-gate 	    CONN_G_HASH_SIZE, KM_SLEEP);
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4350Sstevel@tonic-gate 		mutex_init(&ipcl_globalhash_fanout[i].connf_lock, NULL,
4360Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4370Sstevel@tonic-gate 	}
4380Sstevel@tonic-gate }
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate void
4410Sstevel@tonic-gate ipcl_destroy(void)
4420Sstevel@tonic-gate {
4430Sstevel@tonic-gate 	int i;
4440Sstevel@tonic-gate 	kmem_cache_destroy(ipcl_conn_cache);
4450Sstevel@tonic-gate 	kmem_cache_destroy(ipcl_tcpconn_cache);
4460Sstevel@tonic-gate 	for (i = 0; i < ipcl_conn_fanout_size; i++)
4470Sstevel@tonic-gate 		mutex_destroy(&ipcl_conn_fanout[i].connf_lock);
4480Sstevel@tonic-gate 	kmem_free(ipcl_conn_fanout, ipcl_conn_fanout_size *
4490Sstevel@tonic-gate 	    sizeof (*ipcl_conn_fanout));
4500Sstevel@tonic-gate 	for (i = 0; i < ipcl_bind_fanout_size; i++)
4510Sstevel@tonic-gate 		mutex_destroy(&ipcl_bind_fanout[i].connf_lock);
4520Sstevel@tonic-gate 	kmem_free(ipcl_bind_fanout, ipcl_bind_fanout_size *
4530Sstevel@tonic-gate 	    sizeof (*ipcl_bind_fanout));
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout); i++)
4560Sstevel@tonic-gate 		mutex_destroy(&ipcl_proto_fanout[i].connf_lock);
4570Sstevel@tonic-gate 	for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++)
4580Sstevel@tonic-gate 		mutex_destroy(&ipcl_proto_fanout_v6[i].connf_lock);
4590Sstevel@tonic-gate 
4600Sstevel@tonic-gate 	for (i = 0; i < ipcl_udp_fanout_size; i++)
4610Sstevel@tonic-gate 		mutex_destroy(&ipcl_udp_fanout[i].connf_lock);
4620Sstevel@tonic-gate 	kmem_free(ipcl_udp_fanout, ipcl_udp_fanout_size *
4630Sstevel@tonic-gate 	    sizeof (*ipcl_udp_fanout));
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate 	for (i = 0; i < ipcl_raw_fanout_size; i++)
4660Sstevel@tonic-gate 		mutex_destroy(&ipcl_raw_fanout[i].connf_lock);
4670Sstevel@tonic-gate 	kmem_free(ipcl_raw_fanout, ipcl_raw_fanout_size *
4680Sstevel@tonic-gate 	    sizeof (*ipcl_raw_fanout));
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 	kmem_free(ipcl_globalhash_fanout, sizeof (connf_t) * CONN_G_HASH_SIZE);
4710Sstevel@tonic-gate 	mutex_destroy(&rts_clients.connf_lock);
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate /*
4750Sstevel@tonic-gate  * conn creation routine. initialize the conn, sets the reference
4760Sstevel@tonic-gate  * and inserts it in the global hash table.
4770Sstevel@tonic-gate  */
4780Sstevel@tonic-gate conn_t *
4790Sstevel@tonic-gate ipcl_conn_create(uint32_t type, int sleep)
4800Sstevel@tonic-gate {
4810Sstevel@tonic-gate 	itc_t	*itc;
4820Sstevel@tonic-gate 	conn_t	*connp;
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate 	switch (type) {
4850Sstevel@tonic-gate 	case IPCL_TCPCONN:
4860Sstevel@tonic-gate 		if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache,
4870Sstevel@tonic-gate 		    sleep)) == NULL)
4880Sstevel@tonic-gate 			return (NULL);
4890Sstevel@tonic-gate 		connp = &itc->itc_conn;
4900Sstevel@tonic-gate 		connp->conn_ref = 1;
4910Sstevel@tonic-gate 		IPCL_DEBUG_LVL(1,
4920Sstevel@tonic-gate 		    ("ipcl_conn_create: connp = %p tcp (%p)",
4930Sstevel@tonic-gate 		    (void *)connp, (void *)connp->conn_tcp));
4940Sstevel@tonic-gate 		ipcl_globalhash_insert(connp);
4950Sstevel@tonic-gate 		break;
4960Sstevel@tonic-gate 	case IPCL_SCTPCONN:
4970Sstevel@tonic-gate 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
4980Sstevel@tonic-gate 			return (NULL);
4990Sstevel@tonic-gate 		connp->conn_flags = IPCL_SCTPCONN;
5000Sstevel@tonic-gate 		break;
5010Sstevel@tonic-gate 	case IPCL_IPCCONN:
5020Sstevel@tonic-gate 		connp = kmem_cache_alloc(ipcl_conn_cache, sleep);
5030Sstevel@tonic-gate 		if (connp == NULL)
5040Sstevel@tonic-gate 			return (connp);
5050Sstevel@tonic-gate 		bzero(connp, sizeof (conn_t));
5060Sstevel@tonic-gate 		mutex_init(&connp->conn_lock, NULL,
5070Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
5080Sstevel@tonic-gate 		cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
5090Sstevel@tonic-gate 		connp->conn_flags |= IPCL_IPCCONN;
5100Sstevel@tonic-gate 		connp->conn_ref = 1;
5110Sstevel@tonic-gate 		IPCL_DEBUG_LVL(1,
5120Sstevel@tonic-gate 		    ("ipcl_conn_create: connp = %p\n", (void *)connp));
5130Sstevel@tonic-gate 		ipcl_globalhash_insert(connp);
5140Sstevel@tonic-gate 		break;
5150Sstevel@tonic-gate 	}
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate 	return (connp);
5180Sstevel@tonic-gate }
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate void
5210Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp)
5220Sstevel@tonic-gate {
5230Sstevel@tonic-gate 	mblk_t	*mp;
5240Sstevel@tonic-gate 	tcp_t	*tcp = connp->conn_tcp;
5250Sstevel@tonic-gate 
5260Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
5270Sstevel@tonic-gate 	ASSERT(connp->conn_ref == 0);
5280Sstevel@tonic-gate 	ASSERT(connp->conn_ire_cache == NULL);
5290Sstevel@tonic-gate 
5300Sstevel@tonic-gate 	ipcl_globalhash_remove(connp);
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate 	cv_destroy(&connp->conn_cv);
5330Sstevel@tonic-gate 	if (connp->conn_flags & IPCL_TCPCONN) {
5340Sstevel@tonic-gate 		mutex_destroy(&connp->conn_lock);
5350Sstevel@tonic-gate 		ASSERT(connp->conn_tcp != NULL);
5360Sstevel@tonic-gate 		tcp_free(tcp);
5370Sstevel@tonic-gate 		mp = tcp->tcp_timercache;
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 		if (tcp->tcp_sack_info != NULL) {
5400Sstevel@tonic-gate 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
5410Sstevel@tonic-gate 			kmem_cache_free(tcp_sack_info_cache,
5420Sstevel@tonic-gate 			    tcp->tcp_sack_info);
5430Sstevel@tonic-gate 		}
5440Sstevel@tonic-gate 		if (tcp->tcp_iphc != NULL) {
5450Sstevel@tonic-gate 			if (tcp->tcp_hdr_grown) {
5460Sstevel@tonic-gate 				kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len);
5470Sstevel@tonic-gate 			} else {
5480Sstevel@tonic-gate 				bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
5490Sstevel@tonic-gate 				kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc);
5500Sstevel@tonic-gate 			}
5510Sstevel@tonic-gate 			tcp->tcp_iphc_len = 0;
5520Sstevel@tonic-gate 		}
5530Sstevel@tonic-gate 		ASSERT(tcp->tcp_iphc_len == 0);
5540Sstevel@tonic-gate 
5550Sstevel@tonic-gate 		if (connp->conn_latch != NULL)
5560Sstevel@tonic-gate 			IPLATCH_REFRELE(connp->conn_latch);
5570Sstevel@tonic-gate 		if (connp->conn_policy != NULL)
5580Sstevel@tonic-gate 			IPPH_REFRELE(connp->conn_policy);
5590Sstevel@tonic-gate 		bzero(connp, sizeof (itc_t));
5600Sstevel@tonic-gate 
5610Sstevel@tonic-gate 		tcp->tcp_timercache = mp;
5620Sstevel@tonic-gate 		connp->conn_tcp = tcp;
5630Sstevel@tonic-gate 		connp->conn_flags = IPCL_TCPCONN;
5640Sstevel@tonic-gate 		connp->conn_ulp = IPPROTO_TCP;
5650Sstevel@tonic-gate 		tcp->tcp_connp = connp;
5660Sstevel@tonic-gate 		kmem_cache_free(ipcl_tcpconn_cache, connp);
5670Sstevel@tonic-gate 	} else if (connp->conn_flags & IPCL_SCTPCONN) {
5680Sstevel@tonic-gate 		sctp_free(connp);
5690Sstevel@tonic-gate 	} else {
5700Sstevel@tonic-gate 		mutex_destroy(&connp->conn_lock);
5710Sstevel@tonic-gate 		kmem_cache_free(ipcl_conn_cache, connp);
5720Sstevel@tonic-gate 	}
5730Sstevel@tonic-gate }
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate /*
5760Sstevel@tonic-gate  * Running in cluster mode - deregister listener information
5770Sstevel@tonic-gate  */
5780Sstevel@tonic-gate 
5790Sstevel@tonic-gate static void
5800Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp)
5810Sstevel@tonic-gate {
5820Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
5830Sstevel@tonic-gate 	ASSERT(connp->conn_lport != 0);
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate 	if (cl_inet_unlisten != NULL) {
5860Sstevel@tonic-gate 		sa_family_t	addr_family;
5870Sstevel@tonic-gate 		uint8_t		*laddrp;
5880Sstevel@tonic-gate 
5890Sstevel@tonic-gate 		if (connp->conn_pkt_isv6) {
5900Sstevel@tonic-gate 			addr_family = AF_INET6;
5910Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source_v6;
5920Sstevel@tonic-gate 		} else {
5930Sstevel@tonic-gate 			addr_family = AF_INET;
5940Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source;
5950Sstevel@tonic-gate 		}
5960Sstevel@tonic-gate 		(*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp,
5970Sstevel@tonic-gate 		    connp->conn_lport);
5980Sstevel@tonic-gate 	}
5990Sstevel@tonic-gate 	connp->conn_flags &= ~IPCL_CL_LISTENER;
6000Sstevel@tonic-gate }
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate /*
6030Sstevel@tonic-gate  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
6040Sstevel@tonic-gate  * which table the conn belonged to). So for debugging we can see which hash
6050Sstevel@tonic-gate  * table this connection was in.
6060Sstevel@tonic-gate  */
6070Sstevel@tonic-gate #define	IPCL_HASH_REMOVE(connp)	{					\
6080Sstevel@tonic-gate 	connf_t	*connfp = (connp)->conn_fanout;				\
6090Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
6100Sstevel@tonic-gate 	if (connfp != NULL) {						\
6110Sstevel@tonic-gate 		IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p",	\
6120Sstevel@tonic-gate 		    (void *)(connp)));					\
6130Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);			\
6140Sstevel@tonic-gate 		if ((connp)->conn_next != NULL)				\
6150Sstevel@tonic-gate 			(connp)->conn_next->conn_prev =			\
6160Sstevel@tonic-gate 			    (connp)->conn_prev;				\
6170Sstevel@tonic-gate 		if ((connp)->conn_prev != NULL)				\
6180Sstevel@tonic-gate 			(connp)->conn_prev->conn_next =			\
6190Sstevel@tonic-gate 			    (connp)->conn_next;				\
6200Sstevel@tonic-gate 		else							\
6210Sstevel@tonic-gate 			connfp->connf_head = (connp)->conn_next;	\
6220Sstevel@tonic-gate 		(connp)->conn_fanout = NULL;				\
6230Sstevel@tonic-gate 		(connp)->conn_next = NULL;				\
6240Sstevel@tonic-gate 		(connp)->conn_prev = NULL;				\
6250Sstevel@tonic-gate 		(connp)->conn_flags |= IPCL_REMOVED;			\
6260Sstevel@tonic-gate 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
6270Sstevel@tonic-gate 			ipcl_conn_unlisten((connp));			\
6280Sstevel@tonic-gate 		CONN_DEC_REF((connp));					\
6290Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);			\
6300Sstevel@tonic-gate 	}								\
6310Sstevel@tonic-gate }
6320Sstevel@tonic-gate 
6330Sstevel@tonic-gate void
6340Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp)
6350Sstevel@tonic-gate {
6360Sstevel@tonic-gate 	IPCL_HASH_REMOVE(connp);
6370Sstevel@tonic-gate }
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate /*
6400Sstevel@tonic-gate  * The whole purpose of this function is allow removal of
6410Sstevel@tonic-gate  * a conn_t from the connected hash for timewait reclaim.
6420Sstevel@tonic-gate  * This is essentially a TW reclaim fastpath where timewait
6430Sstevel@tonic-gate  * collector checks under fanout lock (so no one else can
6440Sstevel@tonic-gate  * get access to the conn_t) that refcnt is 2 i.e. one for
6450Sstevel@tonic-gate  * TCP and one for the classifier hash list. If ref count
6460Sstevel@tonic-gate  * is indeed 2, we can just remove the conn under lock and
6470Sstevel@tonic-gate  * avoid cleaning up the conn under squeue. This gives us
6480Sstevel@tonic-gate  * improved performance.
6490Sstevel@tonic-gate  */
6500Sstevel@tonic-gate void
6510Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
6520Sstevel@tonic-gate {
6530Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
6540Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
6550Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
6560Sstevel@tonic-gate 
6570Sstevel@tonic-gate 	if ((connp)->conn_next != NULL) {
6580Sstevel@tonic-gate 		(connp)->conn_next->conn_prev =
6590Sstevel@tonic-gate 			(connp)->conn_prev;
6600Sstevel@tonic-gate 	}
6610Sstevel@tonic-gate 	if ((connp)->conn_prev != NULL) {
6620Sstevel@tonic-gate 		(connp)->conn_prev->conn_next =
6630Sstevel@tonic-gate 			(connp)->conn_next;
6640Sstevel@tonic-gate 	} else {
6650Sstevel@tonic-gate 		connfp->connf_head = (connp)->conn_next;
6660Sstevel@tonic-gate 	}
6670Sstevel@tonic-gate 	(connp)->conn_fanout = NULL;
6680Sstevel@tonic-gate 	(connp)->conn_next = NULL;
6690Sstevel@tonic-gate 	(connp)->conn_prev = NULL;
6700Sstevel@tonic-gate 	(connp)->conn_flags |= IPCL_REMOVED;
6710Sstevel@tonic-gate 	ASSERT((connp)->conn_ref == 2);
6720Sstevel@tonic-gate 	(connp)->conn_ref--;
6730Sstevel@tonic-gate }
6740Sstevel@tonic-gate 
6750Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
6760Sstevel@tonic-gate 	ASSERT((connp)->conn_fanout == NULL);				\
6770Sstevel@tonic-gate 	ASSERT((connp)->conn_next == NULL);				\
6780Sstevel@tonic-gate 	ASSERT((connp)->conn_prev == NULL);				\
6790Sstevel@tonic-gate 	if ((connfp)->connf_head != NULL) {				\
6800Sstevel@tonic-gate 		(connfp)->connf_head->conn_prev = (connp);		\
6810Sstevel@tonic-gate 		(connp)->conn_next = (connfp)->connf_head;		\
6820Sstevel@tonic-gate 	}								\
6830Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
6840Sstevel@tonic-gate 	(connfp)->connf_head = (connp);					\
6850Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
6860Sstevel@tonic-gate 	    IPCL_CONNECTED;						\
6870Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
6880Sstevel@tonic-gate }
6890Sstevel@tonic-gate 
6900Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
6910Sstevel@tonic-gate 	IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p "	\
6920Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
6930Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
6940Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
6950Sstevel@tonic-gate 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
6960Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
6970Sstevel@tonic-gate }
6980Sstevel@tonic-gate 
6990Sstevel@tonic-gate #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
7000Sstevel@tonic-gate 	conn_t *pconnp = NULL, *nconnp;					\
7010Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p "	\
7020Sstevel@tonic-gate 	    "connp %p", (void *)connfp, (void *)(connp)));		\
7030Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
7040Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
7050Sstevel@tonic-gate 	nconnp = (connfp)->connf_head;					\
706153Sethindra 	while (nconnp != NULL &&					\
707153Sethindra 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) {			\
708153Sethindra 		pconnp = nconnp;					\
709153Sethindra 		nconnp = nconnp->conn_next;				\
7100Sstevel@tonic-gate 	}								\
7110Sstevel@tonic-gate 	if (pconnp != NULL) {						\
7120Sstevel@tonic-gate 		pconnp->conn_next = (connp);				\
7130Sstevel@tonic-gate 		(connp)->conn_prev = pconnp;				\
7140Sstevel@tonic-gate 	} else {							\
7150Sstevel@tonic-gate 		(connfp)->connf_head = (connp);				\
7160Sstevel@tonic-gate 	}								\
7170Sstevel@tonic-gate 	if (nconnp != NULL) {						\
7180Sstevel@tonic-gate 		(connp)->conn_next = nconnp;				\
7190Sstevel@tonic-gate 		nconnp->conn_prev = (connp);				\
7200Sstevel@tonic-gate 	}								\
7210Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
7220Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
7230Sstevel@tonic-gate 	    IPCL_BOUND;							\
7240Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
7250Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
7260Sstevel@tonic-gate }
7270Sstevel@tonic-gate 
7280Sstevel@tonic-gate #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
7290Sstevel@tonic-gate 	conn_t **list, *prev, *next;					\
7300Sstevel@tonic-gate 	boolean_t isv4mapped =						\
7310Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6);			\
7320Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p "	\
7330Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
7340Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
7350Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
7360Sstevel@tonic-gate 	list = &(connfp)->connf_head;					\
7370Sstevel@tonic-gate 	prev = NULL;							\
7380Sstevel@tonic-gate 	while ((next = *list) != NULL) {				\
7390Sstevel@tonic-gate 		if (isv4mapped &&					\
7400Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) &&	\
7410Sstevel@tonic-gate 		    connp->conn_zoneid == next->conn_zoneid) {		\
7420Sstevel@tonic-gate 			(connp)->conn_next = next;			\
7430Sstevel@tonic-gate 			if (prev != NULL)				\
7440Sstevel@tonic-gate 				prev = next->conn_prev;			\
7450Sstevel@tonic-gate 			next->conn_prev = (connp);			\
7460Sstevel@tonic-gate 			break;						\
7470Sstevel@tonic-gate 		}							\
7480Sstevel@tonic-gate 		list = &next->conn_next;				\
7490Sstevel@tonic-gate 		prev = next;						\
7500Sstevel@tonic-gate 	}								\
7510Sstevel@tonic-gate 	(connp)->conn_prev = prev;					\
7520Sstevel@tonic-gate 	*list = (connp);						\
7530Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
7540Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
7550Sstevel@tonic-gate 	    IPCL_BOUND;							\
7560Sstevel@tonic-gate 	CONN_INC_REF((connp));						\
7570Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
7580Sstevel@tonic-gate }
7590Sstevel@tonic-gate 
7600Sstevel@tonic-gate void
7610Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
7620Sstevel@tonic-gate {
7630Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
7640Sstevel@tonic-gate }
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate void
7670Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol)
7680Sstevel@tonic-gate {
7690Sstevel@tonic-gate 	connf_t	*connfp;
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate 	ASSERT(connp != NULL);
7720Sstevel@tonic-gate 
7730Sstevel@tonic-gate 	connp->conn_ulp = protocol;
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate 	/* Insert it in the protocol hash */
7760Sstevel@tonic-gate 	connfp = &ipcl_proto_fanout[protocol];
7770Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
7780Sstevel@tonic-gate }
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate void
7810Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol)
7820Sstevel@tonic-gate {
7830Sstevel@tonic-gate 	connf_t	*connfp;
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate 	ASSERT(connp != NULL);
7860Sstevel@tonic-gate 
7870Sstevel@tonic-gate 	connp->conn_ulp = protocol;
7880Sstevel@tonic-gate 
7890Sstevel@tonic-gate 	/* Insert it in the Bind Hash */
7900Sstevel@tonic-gate 	connfp = &ipcl_proto_fanout_v6[protocol];
7910Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
7920Sstevel@tonic-gate }
7930Sstevel@tonic-gate 
7940Sstevel@tonic-gate /*
7950Sstevel@tonic-gate  * This function is used only for inserting SCTP raw socket now.
7960Sstevel@tonic-gate  * This may change later.
7970Sstevel@tonic-gate  *
7980Sstevel@tonic-gate  * Note that only one raw socket can be bound to a port.  The param
7990Sstevel@tonic-gate  * lport is in network byte order.
8000Sstevel@tonic-gate  */
8010Sstevel@tonic-gate static int
8020Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
8030Sstevel@tonic-gate {
8040Sstevel@tonic-gate 	connf_t	*connfp;
8050Sstevel@tonic-gate 	conn_t	*oconnp;
8060Sstevel@tonic-gate 
8070Sstevel@tonic-gate 	connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))];
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate 	/* Check for existing raw socket already bound to the port. */
8100Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
8110Sstevel@tonic-gate 	for (oconnp = connfp->connf_head; oconnp != NULL;
812*409Skcpoon 	    oconnp = oconnp->conn_next) {
8130Sstevel@tonic-gate 		if (oconnp->conn_lport == lport &&
8140Sstevel@tonic-gate 		    oconnp->conn_zoneid == connp->conn_zoneid &&
8150Sstevel@tonic-gate 		    oconnp->conn_af_isv6 == connp->conn_af_isv6 &&
8160Sstevel@tonic-gate 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
8170Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) ||
8180Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) ||
8190Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) ||
8200Sstevel@tonic-gate 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6,
8210Sstevel@tonic-gate 		    &connp->conn_srcv6))) {
8220Sstevel@tonic-gate 			break;
8230Sstevel@tonic-gate 		}
8240Sstevel@tonic-gate 	}
8250Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
8260Sstevel@tonic-gate 	if (oconnp != NULL)
8270Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
8280Sstevel@tonic-gate 
8290Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
8300Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) {
8310Sstevel@tonic-gate 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
8320Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) {
8330Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8340Sstevel@tonic-gate 		} else {
8350Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
8360Sstevel@tonic-gate 		}
8370Sstevel@tonic-gate 	} else {
8380Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
8390Sstevel@tonic-gate 	}
8400Sstevel@tonic-gate 	return (0);
8410Sstevel@tonic-gate }
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate /*
8440Sstevel@tonic-gate  * (v4, v6) bind hash insertion routines
8450Sstevel@tonic-gate  */
8460Sstevel@tonic-gate int
8470Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport)
8480Sstevel@tonic-gate {
8490Sstevel@tonic-gate 	connf_t	*connfp;
8500Sstevel@tonic-gate #ifdef	IPCL_DEBUG
8510Sstevel@tonic-gate 	char	buf[INET_NTOA_BUFSIZE];
8520Sstevel@tonic-gate #endif
8530Sstevel@tonic-gate 	int	ret = 0;
8540Sstevel@tonic-gate 
8550Sstevel@tonic-gate 	ASSERT(connp);
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate 	IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, "
8580Sstevel@tonic-gate 	    "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport));
8590Sstevel@tonic-gate 
8600Sstevel@tonic-gate 	connp->conn_ulp = protocol;
8610Sstevel@tonic-gate 	IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6);
8620Sstevel@tonic-gate 	connp->conn_lport = lport;
8630Sstevel@tonic-gate 
8640Sstevel@tonic-gate 	switch (protocol) {
8650Sstevel@tonic-gate 	case IPPROTO_UDP:
8660Sstevel@tonic-gate 	default:
8670Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
8680Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
8690Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - udp\n",
8700Sstevel@tonic-gate 			    (void *)connp));
8710Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
8720Sstevel@tonic-gate 		} else {
8730Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
8740Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - protocol\n",
8750Sstevel@tonic-gate 			    (void *)connp));
8760Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout[protocol];
8770Sstevel@tonic-gate 		}
8780Sstevel@tonic-gate 
8790Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
8800Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
8810Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
8820Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
8830Sstevel@tonic-gate 		} else {
8840Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8850Sstevel@tonic-gate 		}
8860Sstevel@tonic-gate 		break;
8870Sstevel@tonic-gate 
8880Sstevel@tonic-gate 	case IPPROTO_TCP:
8890Sstevel@tonic-gate 
8900Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
8910Sstevel@tonic-gate 		connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
8920Sstevel@tonic-gate 		if (connp->conn_src != INADDR_ANY) {
8930Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
8940Sstevel@tonic-gate 		} else {
8950Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8960Sstevel@tonic-gate 		}
8970Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
8980Sstevel@tonic-gate 			ASSERT(!connp->conn_pkt_isv6);
8990Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
9000Sstevel@tonic-gate 			(*cl_inet_listen)(IPPROTO_TCP, AF_INET,
9010Sstevel@tonic-gate 			    (uint8_t *)&connp->conn_bound_source, lport);
9020Sstevel@tonic-gate 		}
9030Sstevel@tonic-gate 		break;
9040Sstevel@tonic-gate 
9050Sstevel@tonic-gate 	case IPPROTO_SCTP:
9060Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
9070Sstevel@tonic-gate 		break;
9080Sstevel@tonic-gate 	}
9090Sstevel@tonic-gate 
9100Sstevel@tonic-gate 	return (ret);
9110Sstevel@tonic-gate }
9120Sstevel@tonic-gate 
9130Sstevel@tonic-gate int
9140Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
9150Sstevel@tonic-gate     uint16_t lport)
9160Sstevel@tonic-gate {
9170Sstevel@tonic-gate 	connf_t	*connfp;
9180Sstevel@tonic-gate 	int	ret = 0;
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate 	ASSERT(connp);
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate 	connp->conn_ulp = protocol;
9230Sstevel@tonic-gate 	connp->conn_srcv6 = *src;
9240Sstevel@tonic-gate 	connp->conn_lport = lport;
9250Sstevel@tonic-gate 
9260Sstevel@tonic-gate 	switch (protocol) {
9270Sstevel@tonic-gate 	case IPPROTO_UDP:
9280Sstevel@tonic-gate 	default:
9290Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
9300Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
9310Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - udp\n",
9320Sstevel@tonic-gate 			    (void *)connp));
9330Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
9340Sstevel@tonic-gate 		} else {
9350Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
9360Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - protocol\n",
9370Sstevel@tonic-gate 			    (void *)connp));
9380Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout_v6[protocol];
9390Sstevel@tonic-gate 		}
9400Sstevel@tonic-gate 
9410Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
9420Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
9430Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
9440Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
9450Sstevel@tonic-gate 		} else {
9460Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9470Sstevel@tonic-gate 		}
9480Sstevel@tonic-gate 		break;
9490Sstevel@tonic-gate 
9500Sstevel@tonic-gate 	case IPPROTO_TCP:
9510Sstevel@tonic-gate 		/* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
9540Sstevel@tonic-gate 		connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
9550Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
9560Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
9570Sstevel@tonic-gate 		} else {
9580Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9590Sstevel@tonic-gate 		}
9600Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
9610Sstevel@tonic-gate 			sa_family_t	addr_family;
9620Sstevel@tonic-gate 			uint8_t		*laddrp;
9630Sstevel@tonic-gate 
9640Sstevel@tonic-gate 			if (connp->conn_pkt_isv6) {
9650Sstevel@tonic-gate 				addr_family = AF_INET6;
9660Sstevel@tonic-gate 				laddrp =
9670Sstevel@tonic-gate 				    (uint8_t *)&connp->conn_bound_source_v6;
9680Sstevel@tonic-gate 			} else {
9690Sstevel@tonic-gate 				addr_family = AF_INET;
9700Sstevel@tonic-gate 				laddrp = (uint8_t *)&connp->conn_bound_source;
9710Sstevel@tonic-gate 			}
9720Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
9730Sstevel@tonic-gate 			(*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp,
9740Sstevel@tonic-gate 			    lport);
9750Sstevel@tonic-gate 		}
9760Sstevel@tonic-gate 		break;
9770Sstevel@tonic-gate 
9780Sstevel@tonic-gate 	case IPPROTO_SCTP:
9790Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
9800Sstevel@tonic-gate 		break;
9810Sstevel@tonic-gate 	}
9820Sstevel@tonic-gate 
9830Sstevel@tonic-gate 	return (ret);
9840Sstevel@tonic-gate }
9850Sstevel@tonic-gate 
9860Sstevel@tonic-gate /*
9870Sstevel@tonic-gate  * ipcl_conn_hash insertion routines.
9880Sstevel@tonic-gate  */
9890Sstevel@tonic-gate int
9900Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src,
9910Sstevel@tonic-gate     ipaddr_t rem, uint32_t ports)
9920Sstevel@tonic-gate {
9930Sstevel@tonic-gate 	connf_t		*connfp;
9940Sstevel@tonic-gate 	uint16_t	*up;
9950Sstevel@tonic-gate 	conn_t		*tconnp;
9960Sstevel@tonic-gate #ifdef	IPCL_DEBUG
9970Sstevel@tonic-gate 	char	sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE];
9980Sstevel@tonic-gate #endif
9990Sstevel@tonic-gate 	in_port_t	lport;
10000Sstevel@tonic-gate 	int		ret = 0;
10010Sstevel@tonic-gate 
10020Sstevel@tonic-gate 	IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, "
10030Sstevel@tonic-gate 	    "dst = %s, ports = %x, protocol = %x", (void *)connp,
10040Sstevel@tonic-gate 	    inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf),
10050Sstevel@tonic-gate 	    ports, protocol));
10060Sstevel@tonic-gate 
10070Sstevel@tonic-gate 	switch (protocol) {
10080Sstevel@tonic-gate 	case IPPROTO_TCP:
10090Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
10100Sstevel@tonic-gate 			/*
10110Sstevel@tonic-gate 			 * for a eager connection, i.e connections which
10120Sstevel@tonic-gate 			 * have just been created, the initialization is
10130Sstevel@tonic-gate 			 * already done in ip at conn_creation time, so
10140Sstevel@tonic-gate 			 * we can skip the checks here.
10150Sstevel@tonic-gate 			 */
10160Sstevel@tonic-gate 			IPCL_CONN_INIT(connp, protocol, src, rem, ports);
10170Sstevel@tonic-gate 		}
10180Sstevel@tonic-gate 		connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(connp->conn_rem,
10190Sstevel@tonic-gate 		    connp->conn_ports)];
10200Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
10210Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
10220Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
10230Sstevel@tonic-gate 			if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp,
10240Sstevel@tonic-gate 			    connp->conn_rem, connp->conn_src,
10250Sstevel@tonic-gate 			    connp->conn_ports)) {
10260Sstevel@tonic-gate 
10270Sstevel@tonic-gate 				/* Already have a conn. bail out */
10280Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
10290Sstevel@tonic-gate 				return (EADDRINUSE);
10300Sstevel@tonic-gate 			}
10310Sstevel@tonic-gate 		}
10320Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
10330Sstevel@tonic-gate 			/*
10340Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
10350Sstevel@tonic-gate 			 * rebind. Let it happen.
10360Sstevel@tonic-gate 			 */
10370Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
10380Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
10390Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
10400Sstevel@tonic-gate 		}
10410Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
10420Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
10430Sstevel@tonic-gate 		break;
10440Sstevel@tonic-gate 
10450Sstevel@tonic-gate 	case IPPROTO_SCTP:
1046*409Skcpoon 		/*
1047*409Skcpoon 		 * The raw socket may have already been bound, remove it
1048*409Skcpoon 		 * from the hash first.
1049*409Skcpoon 		 */
1050*409Skcpoon 		IPCL_HASH_REMOVE(connp);
1051*409Skcpoon 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
10520Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
10530Sstevel@tonic-gate 		break;
10540Sstevel@tonic-gate 
10550Sstevel@tonic-gate 	case IPPROTO_UDP:
10560Sstevel@tonic-gate 	default:
10570Sstevel@tonic-gate 		up = (uint16_t *)&ports;
10580Sstevel@tonic-gate 		IPCL_CONN_INIT(connp, protocol, src, rem, ports);
10590Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
10600Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])];
10610Sstevel@tonic-gate 		} else {
10620Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout[protocol];
10630Sstevel@tonic-gate 		}
10640Sstevel@tonic-gate 
10650Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
10660Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
10670Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
10680Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10690Sstevel@tonic-gate 		} else {
10700Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10710Sstevel@tonic-gate 		}
10720Sstevel@tonic-gate 		break;
10730Sstevel@tonic-gate 	}
10740Sstevel@tonic-gate 
10750Sstevel@tonic-gate 	return (ret);
10760Sstevel@tonic-gate }
10770Sstevel@tonic-gate 
10780Sstevel@tonic-gate int
10790Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
10800Sstevel@tonic-gate     const in6_addr_t *rem, uint32_t ports, uint_t ifindex)
10810Sstevel@tonic-gate {
10820Sstevel@tonic-gate 	connf_t		*connfp;
10830Sstevel@tonic-gate 	uint16_t	*up;
10840Sstevel@tonic-gate 	conn_t		*tconnp;
10850Sstevel@tonic-gate 	in_port_t	lport;
10860Sstevel@tonic-gate 	int		ret = 0;
10870Sstevel@tonic-gate 
10880Sstevel@tonic-gate 	switch (protocol) {
10890Sstevel@tonic-gate 	case IPPROTO_TCP:
10900Sstevel@tonic-gate 		/* Just need to insert a conn struct */
10910Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
10920Sstevel@tonic-gate 			IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
10930Sstevel@tonic-gate 		}
10940Sstevel@tonic-gate 		connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(connp->conn_remv6,
10950Sstevel@tonic-gate 		    connp->conn_ports)];
10960Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
10970Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
10980Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
10990Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp,
11000Sstevel@tonic-gate 			    connp->conn_remv6, connp->conn_srcv6,
11010Sstevel@tonic-gate 			    connp->conn_ports) &&
11020Sstevel@tonic-gate 			    (tconnp->conn_tcp->tcp_bound_if == 0 ||
11030Sstevel@tonic-gate 			    tconnp->conn_tcp->tcp_bound_if == ifindex)) {
11040Sstevel@tonic-gate 				/* Already have a conn. bail out */
11050Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
11060Sstevel@tonic-gate 				return (EADDRINUSE);
11070Sstevel@tonic-gate 			}
11080Sstevel@tonic-gate 		}
11090Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
11100Sstevel@tonic-gate 			/*
11110Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
11120Sstevel@tonic-gate 			 * rebind. Let it happen.
11130Sstevel@tonic-gate 			 */
11140Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
11150Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
11160Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
11170Sstevel@tonic-gate 		}
11180Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
11190Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
11200Sstevel@tonic-gate 		break;
11210Sstevel@tonic-gate 
11220Sstevel@tonic-gate 	case IPPROTO_SCTP:
1123*409Skcpoon 		IPCL_HASH_REMOVE(connp);
1124*409Skcpoon 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
11250Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
11260Sstevel@tonic-gate 		break;
11270Sstevel@tonic-gate 
11280Sstevel@tonic-gate 	case IPPROTO_UDP:
11290Sstevel@tonic-gate 	default:
11300Sstevel@tonic-gate 		up = (uint16_t *)&ports;
11310Sstevel@tonic-gate 		IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
11320Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
11330Sstevel@tonic-gate 			connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])];
11340Sstevel@tonic-gate 		} else {
11350Sstevel@tonic-gate 			connfp = &ipcl_proto_fanout_v6[protocol];
11360Sstevel@tonic-gate 		}
11370Sstevel@tonic-gate 
11380Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
11390Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
11400Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
11410Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
11420Sstevel@tonic-gate 		} else {
11430Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
11440Sstevel@tonic-gate 		}
11450Sstevel@tonic-gate 		break;
11460Sstevel@tonic-gate 	}
11470Sstevel@tonic-gate 
11480Sstevel@tonic-gate 	return (ret);
11490Sstevel@tonic-gate }
11500Sstevel@tonic-gate 
11510Sstevel@tonic-gate /*
11520Sstevel@tonic-gate  * v4 packet classifying function. looks up the fanout table to
11530Sstevel@tonic-gate  * find the conn, the packet belongs to. returns the conn with
11540Sstevel@tonic-gate  * the reference held, null otherwise.
11550Sstevel@tonic-gate  */
11560Sstevel@tonic-gate conn_t *
11570Sstevel@tonic-gate ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid)
11580Sstevel@tonic-gate {
11590Sstevel@tonic-gate 	ipha_t	*ipha;
11600Sstevel@tonic-gate 	connf_t	*connfp, *bind_connfp;
11610Sstevel@tonic-gate 	uint16_t lport;
11620Sstevel@tonic-gate 	uint16_t fport;
11630Sstevel@tonic-gate 	uint32_t ports;
11640Sstevel@tonic-gate 	conn_t	*connp;
11650Sstevel@tonic-gate 	uint16_t  *up;
11660Sstevel@tonic-gate 
11670Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
11680Sstevel@tonic-gate 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
11690Sstevel@tonic-gate 
11700Sstevel@tonic-gate 	switch (protocol) {
11710Sstevel@tonic-gate 	case IPPROTO_TCP:
11720Sstevel@tonic-gate 		ports = *(uint32_t *)up;
11730Sstevel@tonic-gate 		connfp =
11740Sstevel@tonic-gate 		    &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, ports)];
11750Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
11760Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
11770Sstevel@tonic-gate 		    connp = connp->conn_next) {
11780Sstevel@tonic-gate 			if (IPCL_CONN_MATCH(connp, protocol,
11790Sstevel@tonic-gate 			    ipha->ipha_src, ipha->ipha_dst, ports))
11800Sstevel@tonic-gate 				break;
11810Sstevel@tonic-gate 		}
11820Sstevel@tonic-gate 
11830Sstevel@tonic-gate 		if (connp != NULL) {
11840Sstevel@tonic-gate 			CONN_INC_REF(connp);
11850Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
11860Sstevel@tonic-gate 			return (connp);
11870Sstevel@tonic-gate 		}
11880Sstevel@tonic-gate 
11890Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
11900Sstevel@tonic-gate 
11910Sstevel@tonic-gate 		lport = up[1];
11920Sstevel@tonic-gate 		bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
11930Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
11940Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
11950Sstevel@tonic-gate 		    connp = connp->conn_next) {
11960Sstevel@tonic-gate 			if (IPCL_BIND_MATCH(connp, protocol,
11970Sstevel@tonic-gate 			    ipha->ipha_dst, lport) &&
11980Sstevel@tonic-gate 			    connp->conn_zoneid == zoneid)
11990Sstevel@tonic-gate 				break;
12000Sstevel@tonic-gate 		}
12010Sstevel@tonic-gate 
12020Sstevel@tonic-gate 		if (connp != NULL) {
12030Sstevel@tonic-gate 			/* Have a listner at least */
12040Sstevel@tonic-gate 			CONN_INC_REF(connp);
12050Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
12060Sstevel@tonic-gate 			return (connp);
12070Sstevel@tonic-gate 		}
12080Sstevel@tonic-gate 
12090Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
12100Sstevel@tonic-gate 
12110Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
12120Sstevel@tonic-gate 		    ("ipcl_classify: couldn't classify mp = %p\n",
12130Sstevel@tonic-gate 		    (void *)mp));
12140Sstevel@tonic-gate 		break;
12150Sstevel@tonic-gate 
12160Sstevel@tonic-gate 	case IPPROTO_UDP:
12170Sstevel@tonic-gate 		lport = up[1];
12180Sstevel@tonic-gate 		fport = up[0];
12190Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport));
12200Sstevel@tonic-gate 		connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
12210Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
12220Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
12230Sstevel@tonic-gate 		    connp = connp->conn_next) {
12240Sstevel@tonic-gate 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
12250Sstevel@tonic-gate 			    fport, ipha->ipha_src) &&
12260Sstevel@tonic-gate 			    connp->conn_zoneid == zoneid)
12270Sstevel@tonic-gate 				break;
12280Sstevel@tonic-gate 		}
12290Sstevel@tonic-gate 
12300Sstevel@tonic-gate 		if (connp != NULL) {
12310Sstevel@tonic-gate 			CONN_INC_REF(connp);
12320Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
12330Sstevel@tonic-gate 			return (connp);
12340Sstevel@tonic-gate 		}
12350Sstevel@tonic-gate 
12360Sstevel@tonic-gate 		/*
12370Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
12380Sstevel@tonic-gate 		 */
12390Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
12400Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
12410Sstevel@tonic-gate 		    ("ipcl_classify: cant find udp conn_t for ports : %x %x",
12420Sstevel@tonic-gate 		    lport, fport));
12430Sstevel@tonic-gate 		break;
12440Sstevel@tonic-gate 	}
12450Sstevel@tonic-gate 
12460Sstevel@tonic-gate 	return (NULL);
12470Sstevel@tonic-gate }
12480Sstevel@tonic-gate 
12490Sstevel@tonic-gate conn_t *
12500Sstevel@tonic-gate ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid)
12510Sstevel@tonic-gate {
12520Sstevel@tonic-gate 	ip6_t		*ip6h;
12530Sstevel@tonic-gate 	connf_t		*connfp, *bind_connfp;
12540Sstevel@tonic-gate 	uint16_t	lport;
12550Sstevel@tonic-gate 	uint16_t	fport;
12560Sstevel@tonic-gate 	tcph_t		*tcph;
12570Sstevel@tonic-gate 	uint32_t	ports;
12580Sstevel@tonic-gate 	conn_t		*connp;
12590Sstevel@tonic-gate 	uint16_t	*up;
12600Sstevel@tonic-gate 
12610Sstevel@tonic-gate 
12620Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
12630Sstevel@tonic-gate 
12640Sstevel@tonic-gate 	switch (protocol) {
12650Sstevel@tonic-gate 	case IPPROTO_TCP:
12660Sstevel@tonic-gate 		tcph = (tcph_t *)&mp->b_rptr[hdr_len];
12670Sstevel@tonic-gate 		up = (uint16_t *)tcph->th_lport;
12680Sstevel@tonic-gate 		ports = *(uint32_t *)up;
12690Sstevel@tonic-gate 
12700Sstevel@tonic-gate 		connfp =
12710Sstevel@tonic-gate 		    &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, ports)];
12720Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
12730Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
12740Sstevel@tonic-gate 		    connp = connp->conn_next) {
12750Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(connp, protocol,
12760Sstevel@tonic-gate 			    ip6h->ip6_src, ip6h->ip6_dst, ports))
12770Sstevel@tonic-gate 				break;
12780Sstevel@tonic-gate 		}
12790Sstevel@tonic-gate 
12800Sstevel@tonic-gate 		if (connp != NULL) {
12810Sstevel@tonic-gate 			CONN_INC_REF(connp);
12820Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
12830Sstevel@tonic-gate 			return (connp);
12840Sstevel@tonic-gate 		}
12850Sstevel@tonic-gate 
12860Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
12870Sstevel@tonic-gate 
12880Sstevel@tonic-gate 		lport = up[1];
12890Sstevel@tonic-gate 		bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
12900Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
12910Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
12920Sstevel@tonic-gate 		    connp = connp->conn_next) {
12930Sstevel@tonic-gate 			if (IPCL_BIND_MATCH_V6(connp, protocol,
12940Sstevel@tonic-gate 			    ip6h->ip6_dst, lport) &&
12950Sstevel@tonic-gate 			    connp->conn_zoneid == zoneid)
12960Sstevel@tonic-gate 				break;
12970Sstevel@tonic-gate 		}
12980Sstevel@tonic-gate 
12990Sstevel@tonic-gate 		if (connp != NULL) {
13000Sstevel@tonic-gate 			/* Have a listner at least */
13010Sstevel@tonic-gate 			CONN_INC_REF(connp);
13020Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
13030Sstevel@tonic-gate 			IPCL_DEBUG_LVL(512,
13040Sstevel@tonic-gate 			    ("ipcl_classify_v6: found listner "
13050Sstevel@tonic-gate 			    "connp = %p\n", (void *)connp));
13060Sstevel@tonic-gate 
13070Sstevel@tonic-gate 			return (connp);
13080Sstevel@tonic-gate 		}
13090Sstevel@tonic-gate 
13100Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
13110Sstevel@tonic-gate 
13120Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
13130Sstevel@tonic-gate 		    ("ipcl_classify_v6: couldn't classify mp = %p\n",
13140Sstevel@tonic-gate 		    (void *)mp));
13150Sstevel@tonic-gate 		break;
13160Sstevel@tonic-gate 
13170Sstevel@tonic-gate 	case IPPROTO_UDP:
13180Sstevel@tonic-gate 		up = (uint16_t *)&mp->b_rptr[hdr_len];
13190Sstevel@tonic-gate 		lport = up[1];
13200Sstevel@tonic-gate 		fport = up[0];
13210Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport,
13220Sstevel@tonic-gate 		    fport));
13230Sstevel@tonic-gate 		connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)];
13240Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
13250Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
13260Sstevel@tonic-gate 		    connp = connp->conn_next) {
13270Sstevel@tonic-gate 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
13280Sstevel@tonic-gate 			    fport, ip6h->ip6_src) &&
13290Sstevel@tonic-gate 			    connp->conn_zoneid == zoneid)
13300Sstevel@tonic-gate 				break;
13310Sstevel@tonic-gate 		}
13320Sstevel@tonic-gate 
13330Sstevel@tonic-gate 		if (connp != NULL) {
13340Sstevel@tonic-gate 			CONN_INC_REF(connp);
13350Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
13360Sstevel@tonic-gate 			return (connp);
13370Sstevel@tonic-gate 		}
13380Sstevel@tonic-gate 
13390Sstevel@tonic-gate 		/*
13400Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
13410Sstevel@tonic-gate 		 */
13420Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
13430Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
13440Sstevel@tonic-gate 		    ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x",
13450Sstevel@tonic-gate 		    lport, fport));
13460Sstevel@tonic-gate 		break;
13470Sstevel@tonic-gate 	}
13480Sstevel@tonic-gate 
13490Sstevel@tonic-gate 
13500Sstevel@tonic-gate 	return (NULL);
13510Sstevel@tonic-gate }
13520Sstevel@tonic-gate 
13530Sstevel@tonic-gate /*
13540Sstevel@tonic-gate  * wrapper around ipcl_classify_(v4,v6) routines.
13550Sstevel@tonic-gate  */
13560Sstevel@tonic-gate conn_t *
13570Sstevel@tonic-gate ipcl_classify(mblk_t *mp, zoneid_t zoneid)
13580Sstevel@tonic-gate {
13590Sstevel@tonic-gate 	uint16_t	hdr_len;
13600Sstevel@tonic-gate 	ipha_t		*ipha;
13610Sstevel@tonic-gate 	uint8_t		*nexthdrp;
13620Sstevel@tonic-gate 
13630Sstevel@tonic-gate 	if (MBLKL(mp) < sizeof (ipha_t))
13640Sstevel@tonic-gate 		return (NULL);
13650Sstevel@tonic-gate 
13660Sstevel@tonic-gate 	switch (IPH_HDR_VERSION(mp->b_rptr)) {
13670Sstevel@tonic-gate 	case IPV4_VERSION:
13680Sstevel@tonic-gate 		ipha = (ipha_t *)mp->b_rptr;
13690Sstevel@tonic-gate 		hdr_len = IPH_HDR_LENGTH(ipha);
13700Sstevel@tonic-gate 		return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len,
13710Sstevel@tonic-gate 		    zoneid));
13720Sstevel@tonic-gate 	case IPV6_VERSION:
13730Sstevel@tonic-gate 		if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr,
13740Sstevel@tonic-gate 		    &hdr_len, &nexthdrp))
13750Sstevel@tonic-gate 			return (NULL);
13760Sstevel@tonic-gate 
13770Sstevel@tonic-gate 		return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid));
13780Sstevel@tonic-gate 	}
13790Sstevel@tonic-gate 
13800Sstevel@tonic-gate 	return (NULL);
13810Sstevel@tonic-gate }
13820Sstevel@tonic-gate 
13830Sstevel@tonic-gate conn_t *
13840Sstevel@tonic-gate ipcl_classify_raw(uint8_t protocol, zoneid_t zoneid, uint32_t ports,
13850Sstevel@tonic-gate     ipha_t *hdr)
13860Sstevel@tonic-gate {
13870Sstevel@tonic-gate 	struct connf_s	*connfp;
13880Sstevel@tonic-gate 	conn_t		*connp;
13890Sstevel@tonic-gate 	in_port_t	lport;
13900Sstevel@tonic-gate 	int		af;
13910Sstevel@tonic-gate 
13920Sstevel@tonic-gate 	lport = ((uint16_t *)&ports)[1];
13930Sstevel@tonic-gate 	af = IPH_HDR_VERSION(hdr);
13940Sstevel@tonic-gate 	connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))];
13950Sstevel@tonic-gate 
13960Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
13970Sstevel@tonic-gate 	for (connp = connfp->connf_head; connp != NULL;
13980Sstevel@tonic-gate 	    connp = connp->conn_next) {
13990Sstevel@tonic-gate 		/* We don't allow v4 fallback for v6 raw socket. */
14000Sstevel@tonic-gate 		if ((af == (connp->conn_af_isv6 ? IPV4_VERSION :
14010Sstevel@tonic-gate 		    IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) {
14020Sstevel@tonic-gate 			continue;
14030Sstevel@tonic-gate 		}
14040Sstevel@tonic-gate 		if (connp->conn_fully_bound) {
14050Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
14060Sstevel@tonic-gate 				if (IPCL_CONN_MATCH(connp, protocol,
14070Sstevel@tonic-gate 				    hdr->ipha_src, hdr->ipha_dst, ports)) {
14080Sstevel@tonic-gate 					break;
14090Sstevel@tonic-gate 				}
14100Sstevel@tonic-gate 			} else {
14110Sstevel@tonic-gate 				if (IPCL_CONN_MATCH_V6(connp, protocol,
14120Sstevel@tonic-gate 				    ((ip6_t *)hdr)->ip6_src,
14130Sstevel@tonic-gate 				    ((ip6_t *)hdr)->ip6_dst, ports)) {
14140Sstevel@tonic-gate 					break;
14150Sstevel@tonic-gate 				}
14160Sstevel@tonic-gate 			}
14170Sstevel@tonic-gate 		} else {
14180Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
14190Sstevel@tonic-gate 				if (IPCL_BIND_MATCH(connp, protocol,
14200Sstevel@tonic-gate 				    hdr->ipha_dst, lport)) {
14210Sstevel@tonic-gate 					break;
14220Sstevel@tonic-gate 				}
14230Sstevel@tonic-gate 			} else {
14240Sstevel@tonic-gate 				if (IPCL_BIND_MATCH_V6(connp, protocol,
14250Sstevel@tonic-gate 				    ((ip6_t *)hdr)->ip6_dst, lport)) {
14260Sstevel@tonic-gate 					break;
14270Sstevel@tonic-gate 				}
14280Sstevel@tonic-gate 			}
14290Sstevel@tonic-gate 		}
14300Sstevel@tonic-gate 	}
1431*409Skcpoon 
1432*409Skcpoon 	if (connp != NULL)
1433*409Skcpoon 		goto found;
1434*409Skcpoon 	mutex_exit(&connfp->connf_lock);
1435*409Skcpoon 
1436*409Skcpoon 	/* Try to look for a wildcard match. */
1437*409Skcpoon 	connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(0)];
1438*409Skcpoon 	mutex_enter(&connfp->connf_lock);
1439*409Skcpoon 	for (connp = connfp->connf_head; connp != NULL;
1440*409Skcpoon 	    connp = connp->conn_next) {
1441*409Skcpoon 		/* We don't allow v4 fallback for v6 raw socket. */
1442*409Skcpoon 		if ((af == (connp->conn_af_isv6 ? IPV4_VERSION :
1443*409Skcpoon 		    IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) {
1444*409Skcpoon 			continue;
1445*409Skcpoon 		}
1446*409Skcpoon 		if (af == IPV4_VERSION) {
1447*409Skcpoon 			if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst))
1448*409Skcpoon 				break;
1449*409Skcpoon 		} else {
1450*409Skcpoon 			if (IPCL_RAW_MATCH_V6(connp, protocol,
1451*409Skcpoon 			    ((ip6_t *)hdr)->ip6_dst)) {
1452*409Skcpoon 				break;
1453*409Skcpoon 			}
1454*409Skcpoon 		}
14550Sstevel@tonic-gate 	}
1456*409Skcpoon 
1457*409Skcpoon 	if (connp != NULL)
1458*409Skcpoon 		goto found;
1459*409Skcpoon 
14600Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
14610Sstevel@tonic-gate 	return (NULL);
1462*409Skcpoon 
1463*409Skcpoon found:
1464*409Skcpoon 	ASSERT(connp != NULL);
1465*409Skcpoon 	CONN_INC_REF(connp);
1466*409Skcpoon 	mutex_exit(&connfp->connf_lock);
1467*409Skcpoon 	return (connp);
14680Sstevel@tonic-gate }
14690Sstevel@tonic-gate 
14700Sstevel@tonic-gate /* ARGSUSED */
14710Sstevel@tonic-gate static int
14720Sstevel@tonic-gate ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags)
14730Sstevel@tonic-gate {
14740Sstevel@tonic-gate 	itc_t	*itc = (itc_t *)buf;
14750Sstevel@tonic-gate 	conn_t 	*connp = &itc->itc_conn;
14760Sstevel@tonic-gate 	tcp_t	*tcp = &itc->itc_tcp;
14770Sstevel@tonic-gate 	bzero(itc, sizeof (itc_t));
14780Sstevel@tonic-gate 	tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP);
14790Sstevel@tonic-gate 	connp->conn_tcp = tcp;
14800Sstevel@tonic-gate 	connp->conn_flags = IPCL_TCPCONN;
14810Sstevel@tonic-gate 	connp->conn_ulp = IPPROTO_TCP;
14820Sstevel@tonic-gate 	tcp->tcp_connp = connp;
14830Sstevel@tonic-gate 	return (0);
14840Sstevel@tonic-gate }
14850Sstevel@tonic-gate 
14860Sstevel@tonic-gate /* ARGSUSED */
14870Sstevel@tonic-gate static void
14880Sstevel@tonic-gate ipcl_tcpconn_destructor(void *buf, void *cdrarg)
14890Sstevel@tonic-gate {
14900Sstevel@tonic-gate 	tcp_timermp_free(((conn_t *)buf)->conn_tcp);
14910Sstevel@tonic-gate }
14920Sstevel@tonic-gate 
14930Sstevel@tonic-gate /*
14940Sstevel@tonic-gate  * All conns are inserted in a global multi-list for the benefit of
14950Sstevel@tonic-gate  * walkers. The walk is guaranteed to walk all open conns at the time
14960Sstevel@tonic-gate  * of the start of the walk exactly once. This property is needed to
14970Sstevel@tonic-gate  * achieve some cleanups during unplumb of interfaces. This is achieved
14980Sstevel@tonic-gate  * as follows.
14990Sstevel@tonic-gate  *
15000Sstevel@tonic-gate  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
15010Sstevel@tonic-gate  * call the insert and delete functions below at creation and deletion
15020Sstevel@tonic-gate  * time respectively. The conn never moves or changes its position in this
15030Sstevel@tonic-gate  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
15040Sstevel@tonic-gate  * won't increase due to walkers, once the conn deletion has started. Note
15050Sstevel@tonic-gate  * that we can't remove the conn from the global list and then wait for
15060Sstevel@tonic-gate  * the refcnt to drop to zero, since walkers would then see a truncated
15070Sstevel@tonic-gate  * list. CONN_INCIPIENT ensures that walkers don't start looking at
15080Sstevel@tonic-gate  * conns until ip_open is ready to make them globally visible.
15090Sstevel@tonic-gate  * The global round robin multi-list locks are held only to get the
15100Sstevel@tonic-gate  * next member/insertion/deletion and contention should be negligible
15110Sstevel@tonic-gate  * if the multi-list is much greater than the number of cpus.
15120Sstevel@tonic-gate  */
15130Sstevel@tonic-gate void
15140Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp)
15150Sstevel@tonic-gate {
15160Sstevel@tonic-gate 	int	index;
15170Sstevel@tonic-gate 
15180Sstevel@tonic-gate 	/*
15190Sstevel@tonic-gate 	 * No need for atomic here. Approximate even distribution
15200Sstevel@tonic-gate 	 * in the global lists is sufficient.
15210Sstevel@tonic-gate 	 */
15220Sstevel@tonic-gate 	conn_g_index++;
15230Sstevel@tonic-gate 	index = conn_g_index & (CONN_G_HASH_SIZE - 1);
15240Sstevel@tonic-gate 
15250Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
15260Sstevel@tonic-gate 	/*
15270Sstevel@tonic-gate 	 * Mark as INCIPIENT, so that walkers will ignore this
15280Sstevel@tonic-gate 	 * for now, till ip_open is ready to make it visible globally.
15290Sstevel@tonic-gate 	 */
15300Sstevel@tonic-gate 	connp->conn_state_flags |= CONN_INCIPIENT;
15310Sstevel@tonic-gate 
15320Sstevel@tonic-gate 	/* Insert at the head of the list */
15330Sstevel@tonic-gate 	mutex_enter(&ipcl_globalhash_fanout[index].connf_lock);
15340Sstevel@tonic-gate 	connp->conn_g_next = ipcl_globalhash_fanout[index].connf_head;
15350Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
15360Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp;
15370Sstevel@tonic-gate 	ipcl_globalhash_fanout[index].connf_head = connp;
15380Sstevel@tonic-gate 
15390Sstevel@tonic-gate 	/* The fanout bucket this conn points to */
15400Sstevel@tonic-gate 	connp->conn_g_fanout = &ipcl_globalhash_fanout[index];
15410Sstevel@tonic-gate 
15420Sstevel@tonic-gate 	mutex_exit(&ipcl_globalhash_fanout[index].connf_lock);
15430Sstevel@tonic-gate }
15440Sstevel@tonic-gate 
15450Sstevel@tonic-gate void
15460Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp)
15470Sstevel@tonic-gate {
15480Sstevel@tonic-gate 	/*
15490Sstevel@tonic-gate 	 * We were never inserted in the global multi list.
15500Sstevel@tonic-gate 	 * IPCL_NONE variety is never inserted in the global multilist
15510Sstevel@tonic-gate 	 * since it is presumed to not need any cleanup and is transient.
15520Sstevel@tonic-gate 	 */
15530Sstevel@tonic-gate 	if (connp->conn_g_fanout == NULL)
15540Sstevel@tonic-gate 		return;
15550Sstevel@tonic-gate 
15560Sstevel@tonic-gate 	mutex_enter(&connp->conn_g_fanout->connf_lock);
15570Sstevel@tonic-gate 	if (connp->conn_g_prev != NULL)
15580Sstevel@tonic-gate 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
15590Sstevel@tonic-gate 	else
15600Sstevel@tonic-gate 		connp->conn_g_fanout->connf_head = connp->conn_g_next;
15610Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
15620Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
15630Sstevel@tonic-gate 	mutex_exit(&connp->conn_g_fanout->connf_lock);
15640Sstevel@tonic-gate 
15650Sstevel@tonic-gate 	/* Better to stumble on a null pointer than to corrupt memory */
15660Sstevel@tonic-gate 	connp->conn_g_next = NULL;
15670Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
15680Sstevel@tonic-gate }
15690Sstevel@tonic-gate 
15700Sstevel@tonic-gate /*
15710Sstevel@tonic-gate  * Walk the list of all conn_t's in the system, calling the function provided
15720Sstevel@tonic-gate  * with the specified argument for each.
15730Sstevel@tonic-gate  * Applies to both IPv4 and IPv6.
15740Sstevel@tonic-gate  *
15750Sstevel@tonic-gate  * IPCs may hold pointers to ipif/ill. To guard against stale pointers
15760Sstevel@tonic-gate  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
15770Sstevel@tonic-gate  * unplumbed or removed. New conn_t's that are created while we are walking
15780Sstevel@tonic-gate  * may be missed by this walk, because they are not necessarily inserted
15790Sstevel@tonic-gate  * at the tail of the list. They are new conn_t's and thus don't have any
15800Sstevel@tonic-gate  * stale pointers. The CONN_CLOSING flag ensures that no new reference
15810Sstevel@tonic-gate  * is created to the struct that is going away.
15820Sstevel@tonic-gate  */
15830Sstevel@tonic-gate void
15840Sstevel@tonic-gate ipcl_walk(pfv_t func, void *arg)
15850Sstevel@tonic-gate {
15860Sstevel@tonic-gate 	int	i;
15870Sstevel@tonic-gate 	conn_t	*connp;
15880Sstevel@tonic-gate 	conn_t	*prev_connp;
15890Sstevel@tonic-gate 
15900Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
15910Sstevel@tonic-gate 		mutex_enter(&ipcl_globalhash_fanout[i].connf_lock);
15920Sstevel@tonic-gate 		prev_connp = NULL;
15930Sstevel@tonic-gate 		connp = ipcl_globalhash_fanout[i].connf_head;
15940Sstevel@tonic-gate 		while (connp != NULL) {
15950Sstevel@tonic-gate 			mutex_enter(&connp->conn_lock);
15960Sstevel@tonic-gate 			if (connp->conn_state_flags &
15970Sstevel@tonic-gate 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
15980Sstevel@tonic-gate 				mutex_exit(&connp->conn_lock);
15990Sstevel@tonic-gate 				connp = connp->conn_g_next;
16000Sstevel@tonic-gate 				continue;
16010Sstevel@tonic-gate 			}
16020Sstevel@tonic-gate 			CONN_INC_REF_LOCKED(connp);
16030Sstevel@tonic-gate 			mutex_exit(&connp->conn_lock);
16040Sstevel@tonic-gate 			mutex_exit(&ipcl_globalhash_fanout[i].connf_lock);
16050Sstevel@tonic-gate 			(*func)(connp, arg);
16060Sstevel@tonic-gate 			if (prev_connp != NULL)
16070Sstevel@tonic-gate 				CONN_DEC_REF(prev_connp);
16080Sstevel@tonic-gate 			mutex_enter(&ipcl_globalhash_fanout[i].connf_lock);
16090Sstevel@tonic-gate 			prev_connp = connp;
16100Sstevel@tonic-gate 			connp = connp->conn_g_next;
16110Sstevel@tonic-gate 		}
16120Sstevel@tonic-gate 		mutex_exit(&ipcl_globalhash_fanout[i].connf_lock);
16130Sstevel@tonic-gate 		if (prev_connp != NULL)
16140Sstevel@tonic-gate 			CONN_DEC_REF(prev_connp);
16150Sstevel@tonic-gate 	}
16160Sstevel@tonic-gate }
16170Sstevel@tonic-gate 
16180Sstevel@tonic-gate /*
16190Sstevel@tonic-gate  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
16200Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
16210Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
16220Sstevel@tonic-gate  * (peer tcp in at least ESTABLISHED state).
16230Sstevel@tonic-gate  */
16240Sstevel@tonic-gate conn_t *
16250Sstevel@tonic-gate ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph)
16260Sstevel@tonic-gate {
16270Sstevel@tonic-gate 	uint32_t ports;
16280Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
16290Sstevel@tonic-gate 	connf_t	*connfp;
16300Sstevel@tonic-gate 	conn_t	*tconnp;
16310Sstevel@tonic-gate 	boolean_t zone_chk;
16320Sstevel@tonic-gate 
16330Sstevel@tonic-gate 	/*
16340Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
16350Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
16360Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
16370Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.
16380Sstevel@tonic-gate 	 */
16390Sstevel@tonic-gate 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
16400Sstevel@tonic-gate 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
16410Sstevel@tonic-gate 
16420Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
16430Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
16440Sstevel@tonic-gate 
16450Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)];
16460Sstevel@tonic-gate 
16470Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
16480Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
16490Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
16500Sstevel@tonic-gate 
16510Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
16520Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
16530Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED &&
16540Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
16550Sstevel@tonic-gate 
16560Sstevel@tonic-gate 			ASSERT(tconnp != connp);
16570Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
16580Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
16590Sstevel@tonic-gate 			return (tconnp);
16600Sstevel@tonic-gate 		}
16610Sstevel@tonic-gate 	}
16620Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
16630Sstevel@tonic-gate 	return (NULL);
16640Sstevel@tonic-gate }
16650Sstevel@tonic-gate 
16660Sstevel@tonic-gate /*
16670Sstevel@tonic-gate  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
16680Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
16690Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
16700Sstevel@tonic-gate  * (peer tcp in at least ESTABLISHED state).
16710Sstevel@tonic-gate  */
16720Sstevel@tonic-gate conn_t *
16730Sstevel@tonic-gate ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph)
16740Sstevel@tonic-gate {
16750Sstevel@tonic-gate 	uint32_t ports;
16760Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
16770Sstevel@tonic-gate 	connf_t	*connfp;
16780Sstevel@tonic-gate 	conn_t	*tconnp;
16790Sstevel@tonic-gate 	boolean_t zone_chk;
16800Sstevel@tonic-gate 
16810Sstevel@tonic-gate 	/*
16820Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
16830Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
16840Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
16850Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.  We
16860Sstevel@tonic-gate 	 * don't do Zone check for link local address(es) because the
16870Sstevel@tonic-gate 	 * current Zone implementation treats each link local address as
16880Sstevel@tonic-gate 	 * being unique per system node, i.e. they belong to global Zone.
16890Sstevel@tonic-gate 	 */
16900Sstevel@tonic-gate 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
16910Sstevel@tonic-gate 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
16920Sstevel@tonic-gate 
16930Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
16940Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
16950Sstevel@tonic-gate 
16960Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)];
16970Sstevel@tonic-gate 
16980Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
16990Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
17000Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
17010Sstevel@tonic-gate 
17020Sstevel@tonic-gate 		/* We skip tcp_bound_if check here as this is loopback tcp */
17030Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
17040Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
17050Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED &&
17060Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
17070Sstevel@tonic-gate 
17080Sstevel@tonic-gate 			ASSERT(tconnp != connp);
17090Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
17100Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17110Sstevel@tonic-gate 			return (tconnp);
17120Sstevel@tonic-gate 		}
17130Sstevel@tonic-gate 	}
17140Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
17150Sstevel@tonic-gate 	return (NULL);
17160Sstevel@tonic-gate }
17170Sstevel@tonic-gate 
17180Sstevel@tonic-gate /*
17190Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
17200Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
17210Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
17220Sstevel@tonic-gate  */
17230Sstevel@tonic-gate conn_t *
17240Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state)
17250Sstevel@tonic-gate {
17260Sstevel@tonic-gate 	uint32_t ports;
17270Sstevel@tonic-gate 	uint16_t *pports;
17280Sstevel@tonic-gate 	connf_t	*connfp;
17290Sstevel@tonic-gate 	conn_t	*tconnp;
17300Sstevel@tonic-gate 
17310Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
17320Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
17330Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
17340Sstevel@tonic-gate 
17350Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)];
17360Sstevel@tonic-gate 
17370Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
17380Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
17390Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
17400Sstevel@tonic-gate 
17410Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
17420Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
17430Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= min_state) {
17440Sstevel@tonic-gate 
17450Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
17460Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17470Sstevel@tonic-gate 			return (tconnp);
17480Sstevel@tonic-gate 		}
17490Sstevel@tonic-gate 	}
17500Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
17510Sstevel@tonic-gate 	return (NULL);
17520Sstevel@tonic-gate }
17530Sstevel@tonic-gate 
17540Sstevel@tonic-gate /*
17550Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
17560Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
17570Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
17580Sstevel@tonic-gate  * Match on ifindex in addition to addresses.
17590Sstevel@tonic-gate  */
17600Sstevel@tonic-gate conn_t *
17610Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
17620Sstevel@tonic-gate     uint_t ifindex)
17630Sstevel@tonic-gate {
17640Sstevel@tonic-gate 	tcp_t	*tcp;
17650Sstevel@tonic-gate 	uint32_t ports;
17660Sstevel@tonic-gate 	uint16_t *pports;
17670Sstevel@tonic-gate 	connf_t	*connfp;
17680Sstevel@tonic-gate 	conn_t	*tconnp;
17690Sstevel@tonic-gate 
17700Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
17710Sstevel@tonic-gate 	pports[0] = tcpha->tha_fport;
17720Sstevel@tonic-gate 	pports[1] = tcpha->tha_lport;
17730Sstevel@tonic-gate 
17740Sstevel@tonic-gate 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)];
17750Sstevel@tonic-gate 
17760Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
17770Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
17780Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
17790Sstevel@tonic-gate 
17800Sstevel@tonic-gate 		tcp = tconnp->conn_tcp;
17810Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
17820Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
17830Sstevel@tonic-gate 		    tcp->tcp_state >= min_state &&
17840Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
17850Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex)) {
17860Sstevel@tonic-gate 
17870Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
17880Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17890Sstevel@tonic-gate 			return (tconnp);
17900Sstevel@tonic-gate 		}
17910Sstevel@tonic-gate 	}
17920Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
17930Sstevel@tonic-gate 	return (NULL);
17940Sstevel@tonic-gate }
17950Sstevel@tonic-gate 
17960Sstevel@tonic-gate /*
17970Sstevel@tonic-gate  * To find a TCP listening connection matching the incoming segment.
17980Sstevel@tonic-gate  */
17990Sstevel@tonic-gate conn_t *
18000Sstevel@tonic-gate ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid)
18010Sstevel@tonic-gate {
18020Sstevel@tonic-gate 	connf_t		*bind_connfp;
18030Sstevel@tonic-gate 	conn_t		*connp;
18040Sstevel@tonic-gate 	tcp_t		*tcp;
18050Sstevel@tonic-gate 
18060Sstevel@tonic-gate 	/*
18070Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
18080Sstevel@tonic-gate 	 * all zeros.
18090Sstevel@tonic-gate 	 */
18100Sstevel@tonic-gate 	if (laddr == 0)
18110Sstevel@tonic-gate 		return (NULL);
18120Sstevel@tonic-gate 
18130Sstevel@tonic-gate 	bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
18140Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
18150Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
18160Sstevel@tonic-gate 	    connp = connp->conn_next) {
18170Sstevel@tonic-gate 		tcp = connp->conn_tcp;
18180Sstevel@tonic-gate 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
18190Sstevel@tonic-gate 		    connp->conn_zoneid == zoneid &&
18200Sstevel@tonic-gate 		    (tcp->tcp_listener == NULL)) {
18210Sstevel@tonic-gate 			CONN_INC_REF(connp);
18220Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
18230Sstevel@tonic-gate 			return (connp);
18240Sstevel@tonic-gate 		}
18250Sstevel@tonic-gate 	}
18260Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
18270Sstevel@tonic-gate 	return (NULL);
18280Sstevel@tonic-gate }
18290Sstevel@tonic-gate 
18300Sstevel@tonic-gate 
18310Sstevel@tonic-gate conn_t *
18320Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
18330Sstevel@tonic-gate     zoneid_t zoneid)
18340Sstevel@tonic-gate {
18350Sstevel@tonic-gate 	connf_t		*bind_connfp;
18360Sstevel@tonic-gate 	conn_t		*connp = NULL;
18370Sstevel@tonic-gate 	tcp_t		*tcp;
18380Sstevel@tonic-gate 
18390Sstevel@tonic-gate 	/*
18400Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
18410Sstevel@tonic-gate 	 * all zeros.
18420Sstevel@tonic-gate 	 */
18430Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
18440Sstevel@tonic-gate 		return (NULL);
18450Sstevel@tonic-gate 
18460Sstevel@tonic-gate 
18470Sstevel@tonic-gate 	bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)];
18480Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
18490Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
18500Sstevel@tonic-gate 	    connp = connp->conn_next) {
18510Sstevel@tonic-gate 		tcp = connp->conn_tcp;
18520Sstevel@tonic-gate 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
18530Sstevel@tonic-gate 		    connp->conn_zoneid == zoneid &&
18540Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
18550Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex) &&
18560Sstevel@tonic-gate 		    tcp->tcp_listener == NULL) {
18570Sstevel@tonic-gate 			CONN_INC_REF(connp);
18580Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
18590Sstevel@tonic-gate 			return (connp);
18600Sstevel@tonic-gate 		}
18610Sstevel@tonic-gate 	}
18620Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
18630Sstevel@tonic-gate 	return (NULL);
18640Sstevel@tonic-gate }
18650Sstevel@tonic-gate 
18660Sstevel@tonic-gate #ifdef CONN_DEBUG
18670Sstevel@tonic-gate /*
18680Sstevel@tonic-gate  * Trace of the last NBUF refhold/refrele
18690Sstevel@tonic-gate  */
18700Sstevel@tonic-gate int
18710Sstevel@tonic-gate conn_trace_ref(conn_t *connp)
18720Sstevel@tonic-gate {
18730Sstevel@tonic-gate 	int	last;
18740Sstevel@tonic-gate 	conn_trace_t	*ctb;
18750Sstevel@tonic-gate 
18760Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
18770Sstevel@tonic-gate 	last = connp->conn_trace_last;
18780Sstevel@tonic-gate 	last++;
18790Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
18800Sstevel@tonic-gate 		last = 0;
18810Sstevel@tonic-gate 
18820Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
18830Sstevel@tonic-gate 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH);
18840Sstevel@tonic-gate 	connp->conn_trace_last = last;
18850Sstevel@tonic-gate 	return (1);
18860Sstevel@tonic-gate }
18870Sstevel@tonic-gate 
18880Sstevel@tonic-gate int
18890Sstevel@tonic-gate conn_untrace_ref(conn_t *connp)
18900Sstevel@tonic-gate {
18910Sstevel@tonic-gate 	int	last;
18920Sstevel@tonic-gate 	conn_trace_t	*ctb;
18930Sstevel@tonic-gate 
18940Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
18950Sstevel@tonic-gate 	last = connp->conn_trace_last;
18960Sstevel@tonic-gate 	last++;
18970Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
18980Sstevel@tonic-gate 		last = 0;
18990Sstevel@tonic-gate 
19000Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
19010Sstevel@tonic-gate 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH);
19020Sstevel@tonic-gate 	connp->conn_trace_last = last;
19030Sstevel@tonic-gate 	return (1);
19040Sstevel@tonic-gate }
19050Sstevel@tonic-gate #endif
1906