xref: /onnv-gate/usr/src/uts/common/inet/ip/ipclassifier.c (revision 11303:3c4e3958fa37)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51503Sericheng  * Common Development and Distribution License (the "License").
61503Sericheng  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
228485SPeter.Memishian@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate  * IP PACKET CLASSIFIER
280Sstevel@tonic-gate  *
290Sstevel@tonic-gate  * The IP packet classifier provides mapping between IP packets and persistent
300Sstevel@tonic-gate  * connection state for connection-oriented protocols. It also provides
310Sstevel@tonic-gate  * interface for managing connection states.
320Sstevel@tonic-gate  *
330Sstevel@tonic-gate  * The connection state is kept in conn_t data structure and contains, among
340Sstevel@tonic-gate  * other things:
350Sstevel@tonic-gate  *
360Sstevel@tonic-gate  *	o local/remote address and ports
370Sstevel@tonic-gate  *	o Transport protocol
380Sstevel@tonic-gate  *	o squeue for the connection (for TCP only)
390Sstevel@tonic-gate  *	o reference counter
400Sstevel@tonic-gate  *	o Connection state
410Sstevel@tonic-gate  *	o hash table linkage
420Sstevel@tonic-gate  *	o interface/ire information
430Sstevel@tonic-gate  *	o credentials
440Sstevel@tonic-gate  *	o ipsec policy
450Sstevel@tonic-gate  *	o send and receive functions.
460Sstevel@tonic-gate  *	o mutex lock.
470Sstevel@tonic-gate  *
480Sstevel@tonic-gate  * Connections use a reference counting scheme. They are freed when the
490Sstevel@tonic-gate  * reference counter drops to zero. A reference is incremented when connection
500Sstevel@tonic-gate  * is placed in a list or table, when incoming packet for the connection arrives
510Sstevel@tonic-gate  * and when connection is processed via squeue (squeue processing may be
520Sstevel@tonic-gate  * asynchronous and the reference protects the connection from being destroyed
530Sstevel@tonic-gate  * before its processing is finished).
540Sstevel@tonic-gate  *
5511042SErik.Nordmark@Sun.COM  * conn_recv is used to pass up packets to the ULP.
5611042SErik.Nordmark@Sun.COM  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
5711042SErik.Nordmark@Sun.COM  * a listener, and changes to tcp_input_listener as the listener has picked a
5811042SErik.Nordmark@Sun.COM  * good squeue. For other cases it is set to tcp_input_data.
5911042SErik.Nordmark@Sun.COM  *
6011042SErik.Nordmark@Sun.COM  * conn_recvicmp is used to pass up ICMP errors to the ULP.
610Sstevel@tonic-gate  *
620Sstevel@tonic-gate  * Classifier uses several hash tables:
630Sstevel@tonic-gate  *
640Sstevel@tonic-gate  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
650Sstevel@tonic-gate  *	ipcl_bind_fanout:	contains all connections in BOUND state
660Sstevel@tonic-gate  *	ipcl_proto_fanout:	IPv4 protocol fanout
670Sstevel@tonic-gate  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
680Sstevel@tonic-gate  *	ipcl_udp_fanout:	contains all UDP connections
6910616SSebastien.Roy@Sun.COM  *	ipcl_iptun_fanout:	contains all IP tunnel connections
700Sstevel@tonic-gate  *	ipcl_globalhash_fanout:	contains all connections
710Sstevel@tonic-gate  *
720Sstevel@tonic-gate  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
730Sstevel@tonic-gate  * which need to view all existing connections.
740Sstevel@tonic-gate  *
750Sstevel@tonic-gate  * All tables are protected by per-bucket locks. When both per-bucket lock and
760Sstevel@tonic-gate  * connection lock need to be held, the per-bucket lock should be acquired
770Sstevel@tonic-gate  * first, followed by the connection lock.
780Sstevel@tonic-gate  *
790Sstevel@tonic-gate  * All functions doing search in one of these tables increment a reference
800Sstevel@tonic-gate  * counter on the connection found (if any). This reference should be dropped
810Sstevel@tonic-gate  * when the caller has finished processing the connection.
820Sstevel@tonic-gate  *
830Sstevel@tonic-gate  *
840Sstevel@tonic-gate  * INTERFACES:
850Sstevel@tonic-gate  * ===========
860Sstevel@tonic-gate  *
870Sstevel@tonic-gate  * Connection Lookup:
880Sstevel@tonic-gate  * ------------------
890Sstevel@tonic-gate  *
9011042SErik.Nordmark@Sun.COM  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
9111042SErik.Nordmark@Sun.COM  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
920Sstevel@tonic-gate  *
930Sstevel@tonic-gate  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
940Sstevel@tonic-gate  * it can't find any associated connection. If the connection is found, its
950Sstevel@tonic-gate  * reference counter is incremented.
960Sstevel@tonic-gate  *
970Sstevel@tonic-gate  *	mp:	mblock, containing packet header. The full header should fit
980Sstevel@tonic-gate  *		into a single mblock. It should also contain at least full IP
990Sstevel@tonic-gate  *		and TCP or UDP header.
1000Sstevel@tonic-gate  *
1010Sstevel@tonic-gate  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
1020Sstevel@tonic-gate  *
1030Sstevel@tonic-gate  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
1040Sstevel@tonic-gate  *		 the packet.
1050Sstevel@tonic-gate  *
10611042SErik.Nordmark@Sun.COM  * 	ira->ira_zoneid: The zone in which the returned connection must be; the
10711042SErik.Nordmark@Sun.COM  *		zoneid corresponding to the ire_zoneid on the IRE located for
10811042SErik.Nordmark@Sun.COM  *		the packet's destination address.
10911042SErik.Nordmark@Sun.COM  *
11011042SErik.Nordmark@Sun.COM  *	ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
11111042SErik.Nordmark@Sun.COM  *		IRAF_TX_SHARED_ADDR flags
1120Sstevel@tonic-gate  *
1130Sstevel@tonic-gate  *	For TCP connections, the lookup order is as follows:
1140Sstevel@tonic-gate  *		5-tuple {src, dst, protocol, local port, remote port}
1150Sstevel@tonic-gate  *			lookup in ipcl_conn_fanout table.
1160Sstevel@tonic-gate  *		3-tuple {dst, remote port, protocol} lookup in
1170Sstevel@tonic-gate  *			ipcl_bind_fanout table.
1180Sstevel@tonic-gate  *
1190Sstevel@tonic-gate  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
1200Sstevel@tonic-gate  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
1210Sstevel@tonic-gate  *	these interfaces do not handle cases where a packets belongs
1220Sstevel@tonic-gate  *	to multiple UDP clients, which is handled in IP itself.
1230Sstevel@tonic-gate  *
1241676Sjpk  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
1251676Sjpk  * determine which actual zone gets the segment.  This is used only in a
1261676Sjpk  * labeled environment.  The matching rules are:
1271676Sjpk  *
1281676Sjpk  *	- If it's not a multilevel port, then the label on the packet selects
1291676Sjpk  *	  the zone.  Unlabeled packets are delivered to the global zone.
1301676Sjpk  *
1311676Sjpk  *	- If it's a multilevel port, then only the zone registered to receive
1321676Sjpk  *	  packets on that port matches.
1331676Sjpk  *
1341676Sjpk  * Also, in a labeled environment, packet labels need to be checked.  For fully
1351676Sjpk  * bound TCP connections, we can assume that the packet label was checked
1361676Sjpk  * during connection establishment, and doesn't need to be checked on each
1371676Sjpk  * packet.  For others, though, we need to check for strict equality or, for
1381676Sjpk  * multilevel ports, membership in the range or set.  This part currently does
1391676Sjpk  * a tnrh lookup on each packet, but could be optimized to use cached results
1401676Sjpk  * if that were necessary.  (SCTP doesn't come through here, but if it did,
1411676Sjpk  * we would apply the same rules as TCP.)
1421676Sjpk  *
1431676Sjpk  * An implication of the above is that fully-bound TCP sockets must always use
1441676Sjpk  * distinct 4-tuples; they can't be discriminated by label alone.
1451676Sjpk  *
1461676Sjpk  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
1471676Sjpk  * as there's no connection set-up handshake and no shared state.
1481676Sjpk  *
1491676Sjpk  * Labels on looped-back packets within a single zone do not need to be
1501676Sjpk  * checked, as all processes in the same zone have the same label.
1511676Sjpk  *
1521676Sjpk  * Finally, for unlabeled packets received by a labeled system, special rules
1531676Sjpk  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
1541676Sjpk  * socket in the zone whose label matches the default label of the sender, if
1551676Sjpk  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
1561676Sjpk  * receiver's label must dominate the sender's default label.
1571676Sjpk  *
15811042SErik.Nordmark@Sun.COM  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
1593448Sdh155122  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
1603448Sdh155122  *					 ip_stack);
1610Sstevel@tonic-gate  *
1620Sstevel@tonic-gate  *	Lookup routine to find a exact match for {src, dst, local port,
1630Sstevel@tonic-gate  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
1640Sstevel@tonic-gate  *	ports are read from the IP and TCP header respectively.
1650Sstevel@tonic-gate  *
1663448Sdh155122  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol,
1673448Sdh155122  *					 zoneid, ip_stack);
1683448Sdh155122  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
1693448Sdh155122  *					 zoneid, ip_stack);
1700Sstevel@tonic-gate  *
1710Sstevel@tonic-gate  * 	Lookup routine to find a listener with the tuple {lport, laddr,
1720Sstevel@tonic-gate  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
1730Sstevel@tonic-gate  * 	parameter interface index is also compared.
1740Sstevel@tonic-gate  *
1753448Sdh155122  * void ipcl_walk(func, arg, ip_stack)
1760Sstevel@tonic-gate  *
1770Sstevel@tonic-gate  * 	Apply 'func' to every connection available. The 'func' is called as
1780Sstevel@tonic-gate  *	(*func)(connp, arg). The walk is non-atomic so connections may be
1790Sstevel@tonic-gate  *	created and destroyed during the walk. The CONN_CONDEMNED and
1800Sstevel@tonic-gate  *	CONN_INCIPIENT flags ensure that connections which are newly created
1810Sstevel@tonic-gate  *	or being destroyed are not selected by the walker.
1820Sstevel@tonic-gate  *
1830Sstevel@tonic-gate  * Table Updates
1840Sstevel@tonic-gate  * -------------
1850Sstevel@tonic-gate  *
18611042SErik.Nordmark@Sun.COM  * int ipcl_conn_insert(connp);
18711042SErik.Nordmark@Sun.COM  * int ipcl_conn_insert_v4(connp);
18811042SErik.Nordmark@Sun.COM  * int ipcl_conn_insert_v6(connp);
1890Sstevel@tonic-gate  *
1900Sstevel@tonic-gate  *	Insert 'connp' in the ipcl_conn_fanout.
1910Sstevel@tonic-gate  *	Arguements :
1920Sstevel@tonic-gate  *		connp		conn_t to be inserted
1930Sstevel@tonic-gate  *
1940Sstevel@tonic-gate  *	Return value :
1950Sstevel@tonic-gate  *		0		if connp was inserted
1960Sstevel@tonic-gate  *		EADDRINUSE	if the connection with the same tuple
1970Sstevel@tonic-gate  *				already exists.
1980Sstevel@tonic-gate  *
19911042SErik.Nordmark@Sun.COM  * int ipcl_bind_insert(connp);
20011042SErik.Nordmark@Sun.COM  * int ipcl_bind_insert_v4(connp);
20111042SErik.Nordmark@Sun.COM  * int ipcl_bind_insert_v6(connp);
2020Sstevel@tonic-gate  *
2030Sstevel@tonic-gate  * 	Insert 'connp' in ipcl_bind_fanout.
2040Sstevel@tonic-gate  * 	Arguements :
2050Sstevel@tonic-gate  * 		connp		conn_t to be inserted
2060Sstevel@tonic-gate  *
2070Sstevel@tonic-gate  *
2080Sstevel@tonic-gate  * void ipcl_hash_remove(connp);
2090Sstevel@tonic-gate  *
2100Sstevel@tonic-gate  * 	Removes the 'connp' from the connection fanout table.
2110Sstevel@tonic-gate  *
2120Sstevel@tonic-gate  * Connection Creation/Destruction
2130Sstevel@tonic-gate  * -------------------------------
2140Sstevel@tonic-gate  *
2153448Sdh155122  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
2160Sstevel@tonic-gate  *
2170Sstevel@tonic-gate  * 	Creates a new conn based on the type flag, inserts it into
2180Sstevel@tonic-gate  * 	globalhash table.
2190Sstevel@tonic-gate  *
2200Sstevel@tonic-gate  *	type:	This flag determines the type of conn_t which needs to be
2215240Snordmark  *		created i.e., which kmem_cache it comes from.
2220Sstevel@tonic-gate  *		IPCL_TCPCONN	indicates a TCP connection
2235240Snordmark  *		IPCL_SCTPCONN	indicates a SCTP connection
2245240Snordmark  *		IPCL_UDPCONN	indicates a UDP conn_t.
2255240Snordmark  *		IPCL_RAWIPCONN	indicates a RAWIP/ICMP conn_t.
2265240Snordmark  *		IPCL_RTSCONN	indicates a RTS conn_t.
2275240Snordmark  *		IPCL_IPCCONN	indicates all other connections.
2280Sstevel@tonic-gate  *
2290Sstevel@tonic-gate  * void ipcl_conn_destroy(connp)
2300Sstevel@tonic-gate  *
2310Sstevel@tonic-gate  * 	Destroys the connection state, removes it from the global
2320Sstevel@tonic-gate  * 	connection hash table and frees its memory.
2330Sstevel@tonic-gate  */
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate #include <sys/types.h>
2360Sstevel@tonic-gate #include <sys/stream.h>
2370Sstevel@tonic-gate #include <sys/stropts.h>
2380Sstevel@tonic-gate #include <sys/sysmacros.h>
2390Sstevel@tonic-gate #include <sys/strsubr.h>
2400Sstevel@tonic-gate #include <sys/strsun.h>
2410Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
2420Sstevel@tonic-gate #include <sys/ddi.h>
2430Sstevel@tonic-gate #include <sys/cmn_err.h>
2440Sstevel@tonic-gate #include <sys/debug.h>
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate #include <sys/systm.h>
2470Sstevel@tonic-gate #include <sys/param.h>
2480Sstevel@tonic-gate #include <sys/kmem.h>
2490Sstevel@tonic-gate #include <sys/isa_defs.h>
2500Sstevel@tonic-gate #include <inet/common.h>
2510Sstevel@tonic-gate #include <netinet/ip6.h>
2520Sstevel@tonic-gate #include <netinet/icmp6.h>
2530Sstevel@tonic-gate 
2540Sstevel@tonic-gate #include <inet/ip.h>
25511042SErik.Nordmark@Sun.COM #include <inet/ip_if.h>
25611042SErik.Nordmark@Sun.COM #include <inet/ip_ire.h>
2570Sstevel@tonic-gate #include <inet/ip6.h>
2580Sstevel@tonic-gate #include <inet/ip_ndp.h>
2598348SEric.Yu@Sun.COM #include <inet/ip_impl.h>
260741Smasputra #include <inet/udp_impl.h>
2610Sstevel@tonic-gate #include <inet/sctp_ip.h>
2623448Sdh155122 #include <inet/sctp/sctp_impl.h>
2635240Snordmark #include <inet/rawip_impl.h>
2645240Snordmark #include <inet/rts_impl.h>
26510616SSebastien.Roy@Sun.COM #include <inet/iptun/iptun_impl.h>
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate #include <sys/cpuvar.h>
2680Sstevel@tonic-gate 
2690Sstevel@tonic-gate #include <inet/ipclassifier.h>
2708348SEric.Yu@Sun.COM #include <inet/tcp.h>
2710Sstevel@tonic-gate #include <inet/ipsec_impl.h>
2720Sstevel@tonic-gate 
2731676Sjpk #include <sys/tsol/tnet.h>
2748348SEric.Yu@Sun.COM #include <sys/sockio.h>
2751676Sjpk 
2763448Sdh155122 /* Old value for compatibility. Setable in /etc/system */
2770Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0;
2780Sstevel@tonic-gate 
2793448Sdh155122 /* New value. Zero means choose automatically.  Setable in /etc/system */
2800Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0;
2810Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192;
2820Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500;
2830Sstevel@tonic-gate 
2840Sstevel@tonic-gate /* bind/udp fanout table size */
2850Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512;
2861503Sericheng uint_t ipcl_udp_fanout_size = 16384;
2870Sstevel@tonic-gate 
2880Sstevel@tonic-gate /* Raw socket fanout size.  Must be a power of 2. */
2890Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256;
2900Sstevel@tonic-gate 
2910Sstevel@tonic-gate /*
29210616SSebastien.Roy@Sun.COM  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
29310616SSebastien.Roy@Sun.COM  * expect that most large deployments would have hundreds of tunnels, and
29410616SSebastien.Roy@Sun.COM  * thousands in the extreme case.
29510616SSebastien.Roy@Sun.COM  */
29610616SSebastien.Roy@Sun.COM uint_t ipcl_iptun_fanout_size = 6143;
29710616SSebastien.Roy@Sun.COM 
29810616SSebastien.Roy@Sun.COM /*
2990Sstevel@tonic-gate  * Power of 2^N Primes useful for hashing for N of 0-28,
3000Sstevel@tonic-gate  * these primes are the nearest prime <= 2^N - 2^(N-2).
3010Sstevel@tonic-gate  */
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
3040Sstevel@tonic-gate 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
3050Sstevel@tonic-gate 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
3060Sstevel@tonic-gate 		50331599, 100663291, 201326557, 0}
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate /*
3095240Snordmark  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
3105240Snordmark  * are aligned on cache lines.
3110Sstevel@tonic-gate  */
3125240Snordmark typedef union itc_s {
3135240Snordmark 	conn_t	itc_conn;
3145240Snordmark 	char	itcu_filler[CACHE_ALIGN(conn_s)];
3150Sstevel@tonic-gate } itc_t;
3160Sstevel@tonic-gate 
3175240Snordmark struct kmem_cache  *tcp_conn_cache;
3185240Snordmark struct kmem_cache  *ip_conn_cache;
3190Sstevel@tonic-gate extern struct kmem_cache  *sctp_conn_cache;
3200Sstevel@tonic-gate extern struct kmem_cache  *tcp_sack_info_cache;
3215240Snordmark struct kmem_cache  *udp_conn_cache;
3225240Snordmark struct kmem_cache  *rawip_conn_cache;
3235240Snordmark struct kmem_cache  *rts_conn_cache;
3240Sstevel@tonic-gate 
3250Sstevel@tonic-gate extern void	tcp_timermp_free(tcp_t *);
3260Sstevel@tonic-gate extern mblk_t	*tcp_timermp_alloc(int);
3270Sstevel@tonic-gate 
3285240Snordmark static int	ip_conn_constructor(void *, void *, int);
3295240Snordmark static void	ip_conn_destructor(void *, void *);
3305240Snordmark 
3315240Snordmark static int	tcp_conn_constructor(void *, void *, int);
3325240Snordmark static void	tcp_conn_destructor(void *, void *);
3335240Snordmark 
3345240Snordmark static int	udp_conn_constructor(void *, void *, int);
3355240Snordmark static void	udp_conn_destructor(void *, void *);
3365240Snordmark 
3375240Snordmark static int	rawip_conn_constructor(void *, void *, int);
3385240Snordmark static void	rawip_conn_destructor(void *, void *);
3395240Snordmark 
3405240Snordmark static int	rts_conn_constructor(void *, void *, int);
3415240Snordmark static void	rts_conn_destructor(void *, void *);
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate /*
3443448Sdh155122  * Global (for all stack instances) init routine
3450Sstevel@tonic-gate  */
3460Sstevel@tonic-gate void
3473448Sdh155122 ipcl_g_init(void)
3480Sstevel@tonic-gate {
3495240Snordmark 	ip_conn_cache = kmem_cache_create("ip_conn_cache",
3500Sstevel@tonic-gate 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
3515240Snordmark 	    ip_conn_constructor, ip_conn_destructor,
3525240Snordmark 	    NULL, NULL, NULL, 0);
3535240Snordmark 
3545240Snordmark 	tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
3555240Snordmark 	    sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
3565240Snordmark 	    tcp_conn_constructor, tcp_conn_destructor,
357*11303SKacheong.Poon@Sun.COM 	    tcp_conn_reclaim, NULL, NULL, 0);
3580Sstevel@tonic-gate 
3595240Snordmark 	udp_conn_cache = kmem_cache_create("udp_conn_cache",
3605240Snordmark 	    sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
3615240Snordmark 	    udp_conn_constructor, udp_conn_destructor,
3625240Snordmark 	    NULL, NULL, NULL, 0);
3635240Snordmark 
3645240Snordmark 	rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
3655240Snordmark 	    sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
3665240Snordmark 	    rawip_conn_constructor, rawip_conn_destructor,
3675240Snordmark 	    NULL, NULL, NULL, 0);
3685240Snordmark 
3695240Snordmark 	rts_conn_cache = kmem_cache_create("rts_conn_cache",
3705240Snordmark 	    sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
3715240Snordmark 	    rts_conn_constructor, rts_conn_destructor,
3720Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
3733448Sdh155122 }
3743448Sdh155122 
3753448Sdh155122 /*
3763448Sdh155122  * ipclassifier intialization routine, sets up hash tables.
3773448Sdh155122  */
3783448Sdh155122 void
3793448Sdh155122 ipcl_init(ip_stack_t *ipst)
3803448Sdh155122 {
3813448Sdh155122 	int i;
3823448Sdh155122 	int sizes[] = P2Ps();
3830Sstevel@tonic-gate 
3840Sstevel@tonic-gate 	/*
3853448Sdh155122 	 * Calculate size of conn fanout table from /etc/system settings
3860Sstevel@tonic-gate 	 */
3870Sstevel@tonic-gate 	if (ipcl_conn_hash_size != 0) {
3883448Sdh155122 		ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
3890Sstevel@tonic-gate 	} else if (tcp_conn_hash_size != 0) {
3903448Sdh155122 		ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
3910Sstevel@tonic-gate 	} else {
3920Sstevel@tonic-gate 		extern pgcnt_t freemem;
3930Sstevel@tonic-gate 
3943448Sdh155122 		ipst->ips_ipcl_conn_fanout_size =
3950Sstevel@tonic-gate 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
3960Sstevel@tonic-gate 
3973448Sdh155122 		if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
3983448Sdh155122 			ipst->ips_ipcl_conn_fanout_size =
3993448Sdh155122 			    ipcl_conn_hash_maxsize;
4003448Sdh155122 		}
4010Sstevel@tonic-gate 	}
4020Sstevel@tonic-gate 
4030Sstevel@tonic-gate 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
4043448Sdh155122 		if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
4050Sstevel@tonic-gate 			break;
4060Sstevel@tonic-gate 		}
4070Sstevel@tonic-gate 	}
4083448Sdh155122 	if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
4090Sstevel@tonic-gate 		/* Out of range, use the 2^16 value */
4103448Sdh155122 		ipst->ips_ipcl_conn_fanout_size = sizes[16];
4110Sstevel@tonic-gate 	}
4123448Sdh155122 
4133448Sdh155122 	/* Take values from /etc/system */
4143448Sdh155122 	ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
4153448Sdh155122 	ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
4163448Sdh155122 	ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
41710616SSebastien.Roy@Sun.COM 	ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
4180Sstevel@tonic-gate 
4193448Sdh155122 	ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
4203448Sdh155122 
4213448Sdh155122 	ipst->ips_ipcl_conn_fanout = kmem_zalloc(
4223448Sdh155122 	    ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
4233448Sdh155122 
4243448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
4253448Sdh155122 		mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
4260Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4270Sstevel@tonic-gate 	}
4280Sstevel@tonic-gate 
4293448Sdh155122 	ipst->ips_ipcl_bind_fanout = kmem_zalloc(
4303448Sdh155122 	    ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
4310Sstevel@tonic-gate 
4323448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
4333448Sdh155122 		mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
4340Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4350Sstevel@tonic-gate 	}
4360Sstevel@tonic-gate 
43711042SErik.Nordmark@Sun.COM 	ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
4383448Sdh155122 	    sizeof (connf_t), KM_SLEEP);
4393448Sdh155122 	for (i = 0; i < IPPROTO_MAX; i++) {
44011042SErik.Nordmark@Sun.COM 		mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
4410Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4420Sstevel@tonic-gate 	}
4433448Sdh155122 
4443448Sdh155122 	ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
4453448Sdh155122 	    sizeof (connf_t), KM_SLEEP);
4463448Sdh155122 	for (i = 0; i < IPPROTO_MAX; i++) {
4473448Sdh155122 		mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
4480Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4490Sstevel@tonic-gate 	}
4500Sstevel@tonic-gate 
4513448Sdh155122 	ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
4523448Sdh155122 	mutex_init(&ipst->ips_rts_clients->connf_lock,
4533448Sdh155122 	    NULL, MUTEX_DEFAULT, NULL);
4540Sstevel@tonic-gate 
4553448Sdh155122 	ipst->ips_ipcl_udp_fanout = kmem_zalloc(
4563448Sdh155122 	    ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
4573448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
4583448Sdh155122 		mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
4590Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4600Sstevel@tonic-gate 	}
4610Sstevel@tonic-gate 
46210616SSebastien.Roy@Sun.COM 	ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
46310616SSebastien.Roy@Sun.COM 	    ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
46410616SSebastien.Roy@Sun.COM 	for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
46510616SSebastien.Roy@Sun.COM 		mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
46610616SSebastien.Roy@Sun.COM 		    MUTEX_DEFAULT, NULL);
46710616SSebastien.Roy@Sun.COM 	}
46810616SSebastien.Roy@Sun.COM 
4693448Sdh155122 	ipst->ips_ipcl_raw_fanout = kmem_zalloc(
4703448Sdh155122 	    ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
4713448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
4723448Sdh155122 		mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
4730Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4740Sstevel@tonic-gate 	}
4750Sstevel@tonic-gate 
4763448Sdh155122 	ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
4773448Sdh155122 	    sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
4780Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4793448Sdh155122 		mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
4803448Sdh155122 		    NULL, MUTEX_DEFAULT, NULL);
4810Sstevel@tonic-gate 	}
4820Sstevel@tonic-gate }
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate void
4853448Sdh155122 ipcl_g_destroy(void)
4860Sstevel@tonic-gate {
4875240Snordmark 	kmem_cache_destroy(ip_conn_cache);
4885240Snordmark 	kmem_cache_destroy(tcp_conn_cache);
4895240Snordmark 	kmem_cache_destroy(udp_conn_cache);
4905240Snordmark 	kmem_cache_destroy(rawip_conn_cache);
4915240Snordmark 	kmem_cache_destroy(rts_conn_cache);
4923448Sdh155122 }
4933448Sdh155122 
4943448Sdh155122 /*
4953448Sdh155122  * All user-level and kernel use of the stack must be gone
4963448Sdh155122  * by now.
4973448Sdh155122  */
4983448Sdh155122 void
4993448Sdh155122 ipcl_destroy(ip_stack_t *ipst)
5003448Sdh155122 {
5013448Sdh155122 	int i;
5023448Sdh155122 
5033448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
5043448Sdh155122 		ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
5053448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
5063448Sdh155122 	}
5073448Sdh155122 	kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
5083448Sdh155122 	    sizeof (connf_t));
5093448Sdh155122 	ipst->ips_ipcl_conn_fanout = NULL;
5103448Sdh155122 
5113448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
5123448Sdh155122 		ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
5133448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
5143448Sdh155122 	}
5153448Sdh155122 	kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
5163448Sdh155122 	    sizeof (connf_t));
5173448Sdh155122 	ipst->ips_ipcl_bind_fanout = NULL;
5183448Sdh155122 
5193448Sdh155122 	for (i = 0; i < IPPROTO_MAX; i++) {
52011042SErik.Nordmark@Sun.COM 		ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
52111042SErik.Nordmark@Sun.COM 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
5223448Sdh155122 	}
52311042SErik.Nordmark@Sun.COM 	kmem_free(ipst->ips_ipcl_proto_fanout_v4,
52411042SErik.Nordmark@Sun.COM 	    IPPROTO_MAX * sizeof (connf_t));
52511042SErik.Nordmark@Sun.COM 	ipst->ips_ipcl_proto_fanout_v4 = NULL;
5260Sstevel@tonic-gate 
5273448Sdh155122 	for (i = 0; i < IPPROTO_MAX; i++) {
5283448Sdh155122 		ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
5293448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
5303448Sdh155122 	}
5313448Sdh155122 	kmem_free(ipst->ips_ipcl_proto_fanout_v6,
5323448Sdh155122 	    IPPROTO_MAX * sizeof (connf_t));
5333448Sdh155122 	ipst->ips_ipcl_proto_fanout_v6 = NULL;
5343448Sdh155122 
5353448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
5363448Sdh155122 		ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
5373448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
5383448Sdh155122 	}
5393448Sdh155122 	kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
5403448Sdh155122 	    sizeof (connf_t));
5413448Sdh155122 	ipst->ips_ipcl_udp_fanout = NULL;
5420Sstevel@tonic-gate 
54310616SSebastien.Roy@Sun.COM 	for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
54410616SSebastien.Roy@Sun.COM 		ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
54510616SSebastien.Roy@Sun.COM 		mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
54610616SSebastien.Roy@Sun.COM 	}
54710616SSebastien.Roy@Sun.COM 	kmem_free(ipst->ips_ipcl_iptun_fanout,
54810616SSebastien.Roy@Sun.COM 	    ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
54910616SSebastien.Roy@Sun.COM 	ipst->ips_ipcl_iptun_fanout = NULL;
55010616SSebastien.Roy@Sun.COM 
5513448Sdh155122 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
5523448Sdh155122 		ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
5533448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
5543448Sdh155122 	}
5553448Sdh155122 	kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
5563448Sdh155122 	    sizeof (connf_t));
5573448Sdh155122 	ipst->ips_ipcl_raw_fanout = NULL;
5580Sstevel@tonic-gate 
5593448Sdh155122 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
5603448Sdh155122 		ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
5613448Sdh155122 		mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
5623448Sdh155122 	}
5633448Sdh155122 	kmem_free(ipst->ips_ipcl_globalhash_fanout,
5643448Sdh155122 	    sizeof (connf_t) * CONN_G_HASH_SIZE);
5653448Sdh155122 	ipst->ips_ipcl_globalhash_fanout = NULL;
5660Sstevel@tonic-gate 
5673448Sdh155122 	ASSERT(ipst->ips_rts_clients->connf_head == NULL);
5683448Sdh155122 	mutex_destroy(&ipst->ips_rts_clients->connf_lock);
5693448Sdh155122 	kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
5703448Sdh155122 	ipst->ips_rts_clients = NULL;
5710Sstevel@tonic-gate }
5720Sstevel@tonic-gate 
5730Sstevel@tonic-gate /*
5740Sstevel@tonic-gate  * conn creation routine. initialize the conn, sets the reference
5750Sstevel@tonic-gate  * and inserts it in the global hash table.
5760Sstevel@tonic-gate  */
5770Sstevel@tonic-gate conn_t *
5783448Sdh155122 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
5790Sstevel@tonic-gate {
5800Sstevel@tonic-gate 	conn_t	*connp;
5815240Snordmark 	struct kmem_cache *conn_cache;
5820Sstevel@tonic-gate 
5830Sstevel@tonic-gate 	switch (type) {
5840Sstevel@tonic-gate 	case IPCL_SCTPCONN:
5850Sstevel@tonic-gate 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
5860Sstevel@tonic-gate 			return (NULL);
5874691Skcpoon 		sctp_conn_init(connp);
5883448Sdh155122 		netstack_hold(ns);
5893448Sdh155122 		connp->conn_netstack = ns;
59011042SErik.Nordmark@Sun.COM 		connp->conn_ixa->ixa_ipst = ns->netstack_ip;
59111042SErik.Nordmark@Sun.COM 		ipcl_globalhash_insert(connp);
5925240Snordmark 		return (connp);
5935240Snordmark 
5945240Snordmark 	case IPCL_TCPCONN:
5955240Snordmark 		conn_cache = tcp_conn_cache;
5960Sstevel@tonic-gate 		break;
5975240Snordmark 
5985240Snordmark 	case IPCL_UDPCONN:
5995240Snordmark 		conn_cache = udp_conn_cache;
6005240Snordmark 		break;
6015240Snordmark 
6025240Snordmark 	case IPCL_RAWIPCONN:
6035240Snordmark 		conn_cache = rawip_conn_cache;
6045240Snordmark 		break;
6055240Snordmark 
6065240Snordmark 	case IPCL_RTSCONN:
6075240Snordmark 		conn_cache = rts_conn_cache;
6085240Snordmark 		break;
6095240Snordmark 
6100Sstevel@tonic-gate 	case IPCL_IPCCONN:
6115240Snordmark 		conn_cache = ip_conn_cache;
6120Sstevel@tonic-gate 		break;
6135240Snordmark 
614741Smasputra 	default:
615741Smasputra 		connp = NULL;
616741Smasputra 		ASSERT(0);
6170Sstevel@tonic-gate 	}
6180Sstevel@tonic-gate 
6195240Snordmark 	if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
6205240Snordmark 		return (NULL);
6215240Snordmark 
6225240Snordmark 	connp->conn_ref = 1;
6235240Snordmark 	netstack_hold(ns);
6245240Snordmark 	connp->conn_netstack = ns;
62511042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_ipst = ns->netstack_ip;
6265240Snordmark 	ipcl_globalhash_insert(connp);
6270Sstevel@tonic-gate 	return (connp);
6280Sstevel@tonic-gate }
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate void
6310Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp)
6320Sstevel@tonic-gate {
6330Sstevel@tonic-gate 	mblk_t	*mp;
6343448Sdh155122 	netstack_t	*ns = connp->conn_netstack;
6350Sstevel@tonic-gate 
6360Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
6370Sstevel@tonic-gate 	ASSERT(connp->conn_ref == 0);
6380Sstevel@tonic-gate 
6397502Saruna@cs.umn.edu 	DTRACE_PROBE1(conn__destroy, conn_t *, connp);
6407502Saruna@cs.umn.edu 
6411676Sjpk 	if (connp->conn_cred != NULL) {
6421676Sjpk 		crfree(connp->conn_cred);
6431676Sjpk 		connp->conn_cred = NULL;
6441676Sjpk 	}
6451676Sjpk 
64611042SErik.Nordmark@Sun.COM 	if (connp->conn_ht_iphc != NULL) {
64711042SErik.Nordmark@Sun.COM 		kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
64811042SErik.Nordmark@Sun.COM 		connp->conn_ht_iphc = NULL;
64911042SErik.Nordmark@Sun.COM 		connp->conn_ht_iphc_allocated = 0;
65011042SErik.Nordmark@Sun.COM 		connp->conn_ht_iphc_len = 0;
65111042SErik.Nordmark@Sun.COM 		connp->conn_ht_ulp = NULL;
65211042SErik.Nordmark@Sun.COM 		connp->conn_ht_ulp_len = 0;
65311042SErik.Nordmark@Sun.COM 	}
65411042SErik.Nordmark@Sun.COM 	ip_pkt_free(&connp->conn_xmit_ipp);
65511042SErik.Nordmark@Sun.COM 
6560Sstevel@tonic-gate 	ipcl_globalhash_remove(connp);
6570Sstevel@tonic-gate 
65811042SErik.Nordmark@Sun.COM 	if (connp->conn_latch != NULL) {
65911042SErik.Nordmark@Sun.COM 		IPLATCH_REFRELE(connp->conn_latch);
66011042SErik.Nordmark@Sun.COM 		connp->conn_latch = NULL;
66111042SErik.Nordmark@Sun.COM 	}
66211042SErik.Nordmark@Sun.COM 	if (connp->conn_latch_in_policy != NULL) {
66311042SErik.Nordmark@Sun.COM 		IPPOL_REFRELE(connp->conn_latch_in_policy);
66411042SErik.Nordmark@Sun.COM 		connp->conn_latch_in_policy = NULL;
66511042SErik.Nordmark@Sun.COM 	}
66611042SErik.Nordmark@Sun.COM 	if (connp->conn_latch_in_action != NULL) {
66711042SErik.Nordmark@Sun.COM 		IPACT_REFRELE(connp->conn_latch_in_action);
66811042SErik.Nordmark@Sun.COM 		connp->conn_latch_in_action = NULL;
66911042SErik.Nordmark@Sun.COM 	}
67011042SErik.Nordmark@Sun.COM 	if (connp->conn_policy != NULL) {
67111042SErik.Nordmark@Sun.COM 		IPPH_REFRELE(connp->conn_policy, ns);
67211042SErik.Nordmark@Sun.COM 		connp->conn_policy = NULL;
67311042SErik.Nordmark@Sun.COM 	}
6743448Sdh155122 
67511042SErik.Nordmark@Sun.COM 	if (connp->conn_ipsec_opt_mp != NULL) {
67611042SErik.Nordmark@Sun.COM 		freemsg(connp->conn_ipsec_opt_mp);
67711042SErik.Nordmark@Sun.COM 		connp->conn_ipsec_opt_mp = NULL;
67811042SErik.Nordmark@Sun.COM 	}
67911042SErik.Nordmark@Sun.COM 
68011042SErik.Nordmark@Sun.COM 	if (connp->conn_flags & IPCL_TCPCONN) {
68111042SErik.Nordmark@Sun.COM 		tcp_t *tcp = connp->conn_tcp;
682741Smasputra 
6830Sstevel@tonic-gate 		tcp_free(tcp);
6840Sstevel@tonic-gate 		mp = tcp->tcp_timercache;
68511042SErik.Nordmark@Sun.COM 
68611042SErik.Nordmark@Sun.COM 		tcp->tcp_tcps = NULL;
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate 		if (tcp->tcp_sack_info != NULL) {
6890Sstevel@tonic-gate 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
6900Sstevel@tonic-gate 			kmem_cache_free(tcp_sack_info_cache,
6910Sstevel@tonic-gate 			    tcp->tcp_sack_info);
6920Sstevel@tonic-gate 		}
6930Sstevel@tonic-gate 
6948014SKacheong.Poon@Sun.COM 		/*
6958014SKacheong.Poon@Sun.COM 		 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
6968014SKacheong.Poon@Sun.COM 		 * the mblk.
6978014SKacheong.Poon@Sun.COM 		 */
6988014SKacheong.Poon@Sun.COM 		if (tcp->tcp_rsrv_mp != NULL) {
6998014SKacheong.Poon@Sun.COM 			freeb(tcp->tcp_rsrv_mp);
7008014SKacheong.Poon@Sun.COM 			tcp->tcp_rsrv_mp = NULL;
7018014SKacheong.Poon@Sun.COM 			mutex_destroy(&tcp->tcp_rsrv_mp_lock);
7028014SKacheong.Poon@Sun.COM 		}
7038014SKacheong.Poon@Sun.COM 
70411042SErik.Nordmark@Sun.COM 		ipcl_conn_cleanup(connp);
70511042SErik.Nordmark@Sun.COM 		connp->conn_flags = IPCL_TCPCONN;
7063448Sdh155122 		if (ns != NULL) {
7073448Sdh155122 			ASSERT(tcp->tcp_tcps == NULL);
7083448Sdh155122 			connp->conn_netstack = NULL;
70911042SErik.Nordmark@Sun.COM 			connp->conn_ixa->ixa_ipst = NULL;
7103448Sdh155122 			netstack_rele(ns);
7113448Sdh155122 		}
7125240Snordmark 
7135240Snordmark 		bzero(tcp, sizeof (tcp_t));
7145240Snordmark 
7155240Snordmark 		tcp->tcp_timercache = mp;
7165240Snordmark 		tcp->tcp_connp = connp;
7175240Snordmark 		kmem_cache_free(tcp_conn_cache, connp);
7185240Snordmark 		return;
7195240Snordmark 	}
7205240Snordmark 
7215240Snordmark 	if (connp->conn_flags & IPCL_SCTPCONN) {
7223448Sdh155122 		ASSERT(ns != NULL);
7230Sstevel@tonic-gate 		sctp_free(connp);
7245240Snordmark 		return;
7255240Snordmark 	}
7265240Snordmark 
72711042SErik.Nordmark@Sun.COM 	ipcl_conn_cleanup(connp);
7285240Snordmark 	if (ns != NULL) {
7295240Snordmark 		connp->conn_netstack = NULL;
73011042SErik.Nordmark@Sun.COM 		connp->conn_ixa->ixa_ipst = NULL;
7315240Snordmark 		netstack_rele(ns);
7325240Snordmark 	}
7338348SEric.Yu@Sun.COM 
7345240Snordmark 	/* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
7355240Snordmark 	if (connp->conn_flags & IPCL_UDPCONN) {
7365240Snordmark 		connp->conn_flags = IPCL_UDPCONN;
7375240Snordmark 		kmem_cache_free(udp_conn_cache, connp);
7385240Snordmark 	} else if (connp->conn_flags & IPCL_RAWIPCONN) {
7395240Snordmark 		connp->conn_flags = IPCL_RAWIPCONN;
74011042SErik.Nordmark@Sun.COM 		connp->conn_proto = IPPROTO_ICMP;
74111042SErik.Nordmark@Sun.COM 		connp->conn_ixa->ixa_protocol = connp->conn_proto;
7425240Snordmark 		kmem_cache_free(rawip_conn_cache, connp);
7435240Snordmark 	} else if (connp->conn_flags & IPCL_RTSCONN) {
7445240Snordmark 		connp->conn_flags = IPCL_RTSCONN;
7455240Snordmark 		kmem_cache_free(rts_conn_cache, connp);
7460Sstevel@tonic-gate 	} else {
7475240Snordmark 		connp->conn_flags = IPCL_IPCCONN;
7485240Snordmark 		ASSERT(connp->conn_flags & IPCL_IPCCONN);
7495240Snordmark 		ASSERT(connp->conn_priv == NULL);
7505240Snordmark 		kmem_cache_free(ip_conn_cache, connp);
7510Sstevel@tonic-gate 	}
7520Sstevel@tonic-gate }
7530Sstevel@tonic-gate 
7540Sstevel@tonic-gate /*
7550Sstevel@tonic-gate  * Running in cluster mode - deregister listener information
7560Sstevel@tonic-gate  */
7570Sstevel@tonic-gate static void
7580Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp)
7590Sstevel@tonic-gate {
7600Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
7610Sstevel@tonic-gate 	ASSERT(connp->conn_lport != 0);
7620Sstevel@tonic-gate 
7630Sstevel@tonic-gate 	if (cl_inet_unlisten != NULL) {
7640Sstevel@tonic-gate 		sa_family_t	addr_family;
7650Sstevel@tonic-gate 		uint8_t		*laddrp;
7660Sstevel@tonic-gate 
76711042SErik.Nordmark@Sun.COM 		if (connp->conn_ipversion == IPV6_VERSION) {
7680Sstevel@tonic-gate 			addr_family = AF_INET6;
76911042SErik.Nordmark@Sun.COM 			laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
7700Sstevel@tonic-gate 		} else {
7710Sstevel@tonic-gate 			addr_family = AF_INET;
77211042SErik.Nordmark@Sun.COM 			laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
7730Sstevel@tonic-gate 		}
7748392SHuafeng.Lv@Sun.COM 		(*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
7758392SHuafeng.Lv@Sun.COM 		    IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
7760Sstevel@tonic-gate 	}
7770Sstevel@tonic-gate 	connp->conn_flags &= ~IPCL_CL_LISTENER;
7780Sstevel@tonic-gate }
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate /*
7810Sstevel@tonic-gate  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
7820Sstevel@tonic-gate  * which table the conn belonged to). So for debugging we can see which hash
7830Sstevel@tonic-gate  * table this connection was in.
7840Sstevel@tonic-gate  */
7850Sstevel@tonic-gate #define	IPCL_HASH_REMOVE(connp)	{					\
7860Sstevel@tonic-gate 	connf_t	*connfp = (connp)->conn_fanout;				\
7870Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
7880Sstevel@tonic-gate 	if (connfp != NULL) {						\
7890Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);			\
7900Sstevel@tonic-gate 		if ((connp)->conn_next != NULL)				\
7910Sstevel@tonic-gate 			(connp)->conn_next->conn_prev =			\
7920Sstevel@tonic-gate 			    (connp)->conn_prev;				\
7930Sstevel@tonic-gate 		if ((connp)->conn_prev != NULL)				\
7940Sstevel@tonic-gate 			(connp)->conn_prev->conn_next =			\
7950Sstevel@tonic-gate 			    (connp)->conn_next;				\
7960Sstevel@tonic-gate 		else							\
7970Sstevel@tonic-gate 			connfp->connf_head = (connp)->conn_next;	\
7980Sstevel@tonic-gate 		(connp)->conn_fanout = NULL;				\
7990Sstevel@tonic-gate 		(connp)->conn_next = NULL;				\
8000Sstevel@tonic-gate 		(connp)->conn_prev = NULL;				\
8010Sstevel@tonic-gate 		(connp)->conn_flags |= IPCL_REMOVED;			\
8020Sstevel@tonic-gate 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
8030Sstevel@tonic-gate 			ipcl_conn_unlisten((connp));			\
8040Sstevel@tonic-gate 		CONN_DEC_REF((connp));					\
8050Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);			\
8060Sstevel@tonic-gate 	}								\
8070Sstevel@tonic-gate }
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate void
8100Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp)
8110Sstevel@tonic-gate {
81211042SErik.Nordmark@Sun.COM 	uint8_t		protocol = connp->conn_proto;
81311042SErik.Nordmark@Sun.COM 
8140Sstevel@tonic-gate 	IPCL_HASH_REMOVE(connp);
81511042SErik.Nordmark@Sun.COM 	if (protocol == IPPROTO_RSVP)
81611042SErik.Nordmark@Sun.COM 		ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
8170Sstevel@tonic-gate }
8180Sstevel@tonic-gate 
8190Sstevel@tonic-gate /*
8200Sstevel@tonic-gate  * The whole purpose of this function is allow removal of
8210Sstevel@tonic-gate  * a conn_t from the connected hash for timewait reclaim.
8220Sstevel@tonic-gate  * This is essentially a TW reclaim fastpath where timewait
8230Sstevel@tonic-gate  * collector checks under fanout lock (so no one else can
8240Sstevel@tonic-gate  * get access to the conn_t) that refcnt is 2 i.e. one for
8250Sstevel@tonic-gate  * TCP and one for the classifier hash list. If ref count
8260Sstevel@tonic-gate  * is indeed 2, we can just remove the conn under lock and
8270Sstevel@tonic-gate  * avoid cleaning up the conn under squeue. This gives us
8280Sstevel@tonic-gate  * improved performance.
8290Sstevel@tonic-gate  */
8300Sstevel@tonic-gate void
8310Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
8320Sstevel@tonic-gate {
8330Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
8340Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
8350Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
8360Sstevel@tonic-gate 
8370Sstevel@tonic-gate 	if ((connp)->conn_next != NULL) {
8384691Skcpoon 		(connp)->conn_next->conn_prev = (connp)->conn_prev;
8390Sstevel@tonic-gate 	}
8400Sstevel@tonic-gate 	if ((connp)->conn_prev != NULL) {
8414691Skcpoon 		(connp)->conn_prev->conn_next = (connp)->conn_next;
8420Sstevel@tonic-gate 	} else {
8430Sstevel@tonic-gate 		connfp->connf_head = (connp)->conn_next;
8440Sstevel@tonic-gate 	}
8450Sstevel@tonic-gate 	(connp)->conn_fanout = NULL;
8460Sstevel@tonic-gate 	(connp)->conn_next = NULL;
8470Sstevel@tonic-gate 	(connp)->conn_prev = NULL;
8480Sstevel@tonic-gate 	(connp)->conn_flags |= IPCL_REMOVED;
8490Sstevel@tonic-gate 	ASSERT((connp)->conn_ref == 2);
8500Sstevel@tonic-gate 	(connp)->conn_ref--;
8510Sstevel@tonic-gate }
8520Sstevel@tonic-gate 
8530Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
8540Sstevel@tonic-gate 	ASSERT((connp)->conn_fanout == NULL);				\
8550Sstevel@tonic-gate 	ASSERT((connp)->conn_next == NULL);				\
8560Sstevel@tonic-gate 	ASSERT((connp)->conn_prev == NULL);				\
8570Sstevel@tonic-gate 	if ((connfp)->connf_head != NULL) {				\
8580Sstevel@tonic-gate 		(connfp)->connf_head->conn_prev = (connp);		\
8590Sstevel@tonic-gate 		(connp)->conn_next = (connfp)->connf_head;		\
8600Sstevel@tonic-gate 	}								\
8610Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
8620Sstevel@tonic-gate 	(connfp)->connf_head = (connp);					\
8630Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
8640Sstevel@tonic-gate 	    IPCL_CONNECTED;						\
8650Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
8660Sstevel@tonic-gate }
8670Sstevel@tonic-gate 
8680Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
8690Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
8700Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
8710Sstevel@tonic-gate 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
8720Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
8730Sstevel@tonic-gate }
8740Sstevel@tonic-gate 
8750Sstevel@tonic-gate #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
8760Sstevel@tonic-gate 	conn_t *pconnp = NULL, *nconnp;					\
8770Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
8780Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
8790Sstevel@tonic-gate 	nconnp = (connfp)->connf_head;					\
880153Sethindra 	while (nconnp != NULL &&					\
88111042SErik.Nordmark@Sun.COM 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) {		\
882153Sethindra 		pconnp = nconnp;					\
883153Sethindra 		nconnp = nconnp->conn_next;				\
8840Sstevel@tonic-gate 	}								\
8850Sstevel@tonic-gate 	if (pconnp != NULL) {						\
8860Sstevel@tonic-gate 		pconnp->conn_next = (connp);				\
8870Sstevel@tonic-gate 		(connp)->conn_prev = pconnp;				\
8880Sstevel@tonic-gate 	} else {							\
8890Sstevel@tonic-gate 		(connfp)->connf_head = (connp);				\
8900Sstevel@tonic-gate 	}								\
8910Sstevel@tonic-gate 	if (nconnp != NULL) {						\
8920Sstevel@tonic-gate 		(connp)->conn_next = nconnp;				\
8930Sstevel@tonic-gate 		nconnp->conn_prev = (connp);				\
8940Sstevel@tonic-gate 	}								\
8950Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
8960Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
8970Sstevel@tonic-gate 	    IPCL_BOUND;							\
8980Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
8990Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
9000Sstevel@tonic-gate }
9010Sstevel@tonic-gate 
9020Sstevel@tonic-gate #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
9030Sstevel@tonic-gate 	conn_t **list, *prev, *next;					\
9040Sstevel@tonic-gate 	boolean_t isv4mapped =						\
90511042SErik.Nordmark@Sun.COM 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6);		\
9060Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
9070Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
9080Sstevel@tonic-gate 	list = &(connfp)->connf_head;					\
9090Sstevel@tonic-gate 	prev = NULL;							\
9100Sstevel@tonic-gate 	while ((next = *list) != NULL) {				\
9110Sstevel@tonic-gate 		if (isv4mapped &&					\
91211042SErik.Nordmark@Sun.COM 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) &&	\
9130Sstevel@tonic-gate 		    connp->conn_zoneid == next->conn_zoneid) {		\
9140Sstevel@tonic-gate 			(connp)->conn_next = next;			\
9150Sstevel@tonic-gate 			if (prev != NULL)				\
9160Sstevel@tonic-gate 				prev = next->conn_prev;			\
9170Sstevel@tonic-gate 			next->conn_prev = (connp);			\
9180Sstevel@tonic-gate 			break;						\
9190Sstevel@tonic-gate 		}							\
9200Sstevel@tonic-gate 		list = &next->conn_next;				\
9210Sstevel@tonic-gate 		prev = next;						\
9220Sstevel@tonic-gate 	}								\
9230Sstevel@tonic-gate 	(connp)->conn_prev = prev;					\
9240Sstevel@tonic-gate 	*list = (connp);						\
9250Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
9260Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
9270Sstevel@tonic-gate 	    IPCL_BOUND;							\
9280Sstevel@tonic-gate 	CONN_INC_REF((connp));						\
9290Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
9300Sstevel@tonic-gate }
9310Sstevel@tonic-gate 
9320Sstevel@tonic-gate void
9330Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
9340Sstevel@tonic-gate {
9350Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9360Sstevel@tonic-gate }
9370Sstevel@tonic-gate 
9380Sstevel@tonic-gate /*
93910616SSebastien.Roy@Sun.COM  * Because the classifier is used to classify inbound packets, the destination
94010616SSebastien.Roy@Sun.COM  * address is meant to be our local tunnel address (tunnel source), and the
94110616SSebastien.Roy@Sun.COM  * source the remote tunnel address (tunnel destination).
94211042SErik.Nordmark@Sun.COM  *
94311042SErik.Nordmark@Sun.COM  * Note that conn_proto can't be used for fanout since the upper protocol
94411042SErik.Nordmark@Sun.COM  * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
94510616SSebastien.Roy@Sun.COM  */
94610616SSebastien.Roy@Sun.COM conn_t *
94710616SSebastien.Roy@Sun.COM ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
94810616SSebastien.Roy@Sun.COM {
94910616SSebastien.Roy@Sun.COM 	connf_t	*connfp;
95010616SSebastien.Roy@Sun.COM 	conn_t	*connp;
95110616SSebastien.Roy@Sun.COM 
95210616SSebastien.Roy@Sun.COM 	/* first look for IPv4 tunnel links */
95310616SSebastien.Roy@Sun.COM 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
95410616SSebastien.Roy@Sun.COM 	mutex_enter(&connfp->connf_lock);
95510616SSebastien.Roy@Sun.COM 	for (connp = connfp->connf_head; connp != NULL;
95610616SSebastien.Roy@Sun.COM 	    connp = connp->conn_next) {
95710616SSebastien.Roy@Sun.COM 		if (IPCL_IPTUN_MATCH(connp, *dst, *src))
95810616SSebastien.Roy@Sun.COM 			break;
95910616SSebastien.Roy@Sun.COM 	}
96010616SSebastien.Roy@Sun.COM 	if (connp != NULL)
96110616SSebastien.Roy@Sun.COM 		goto done;
96210616SSebastien.Roy@Sun.COM 
96310616SSebastien.Roy@Sun.COM 	mutex_exit(&connfp->connf_lock);
96410616SSebastien.Roy@Sun.COM 
96510616SSebastien.Roy@Sun.COM 	/* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
96610616SSebastien.Roy@Sun.COM 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
96710616SSebastien.Roy@Sun.COM 	    INADDR_ANY)];
96810616SSebastien.Roy@Sun.COM 	mutex_enter(&connfp->connf_lock);
96910616SSebastien.Roy@Sun.COM 	for (connp = connfp->connf_head; connp != NULL;
97010616SSebastien.Roy@Sun.COM 	    connp = connp->conn_next) {
97110616SSebastien.Roy@Sun.COM 		if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
97210616SSebastien.Roy@Sun.COM 			break;
97310616SSebastien.Roy@Sun.COM 	}
97410616SSebastien.Roy@Sun.COM done:
97510616SSebastien.Roy@Sun.COM 	if (connp != NULL)
97610616SSebastien.Roy@Sun.COM 		CONN_INC_REF(connp);
97710616SSebastien.Roy@Sun.COM 	mutex_exit(&connfp->connf_lock);
97810616SSebastien.Roy@Sun.COM 	return (connp);
97910616SSebastien.Roy@Sun.COM }
98010616SSebastien.Roy@Sun.COM 
98110616SSebastien.Roy@Sun.COM conn_t *
98210616SSebastien.Roy@Sun.COM ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
98310616SSebastien.Roy@Sun.COM {
98410616SSebastien.Roy@Sun.COM 	connf_t	*connfp;
98510616SSebastien.Roy@Sun.COM 	conn_t	*connp;
98610616SSebastien.Roy@Sun.COM 
98710616SSebastien.Roy@Sun.COM 	/* Look for an IPv6 tunnel link */
98810616SSebastien.Roy@Sun.COM 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
98910616SSebastien.Roy@Sun.COM 	mutex_enter(&connfp->connf_lock);
99010616SSebastien.Roy@Sun.COM 	for (connp = connfp->connf_head; connp != NULL;
99110616SSebastien.Roy@Sun.COM 	    connp = connp->conn_next) {
99210616SSebastien.Roy@Sun.COM 		if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
99310616SSebastien.Roy@Sun.COM 			CONN_INC_REF(connp);
99410616SSebastien.Roy@Sun.COM 			break;
99510616SSebastien.Roy@Sun.COM 		}
99610616SSebastien.Roy@Sun.COM 	}
99710616SSebastien.Roy@Sun.COM 	mutex_exit(&connfp->connf_lock);
99810616SSebastien.Roy@Sun.COM 	return (connp);
99910616SSebastien.Roy@Sun.COM }
100010616SSebastien.Roy@Sun.COM 
100110616SSebastien.Roy@Sun.COM /*
10020Sstevel@tonic-gate  * This function is used only for inserting SCTP raw socket now.
10030Sstevel@tonic-gate  * This may change later.
10040Sstevel@tonic-gate  *
10050Sstevel@tonic-gate  * Note that only one raw socket can be bound to a port.  The param
10060Sstevel@tonic-gate  * lport is in network byte order.
10070Sstevel@tonic-gate  */
10080Sstevel@tonic-gate static int
10090Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
10100Sstevel@tonic-gate {
10110Sstevel@tonic-gate 	connf_t	*connfp;
10120Sstevel@tonic-gate 	conn_t	*oconnp;
10133448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
10140Sstevel@tonic-gate 
10153448Sdh155122 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
10160Sstevel@tonic-gate 
10170Sstevel@tonic-gate 	/* Check for existing raw socket already bound to the port. */
10180Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
10190Sstevel@tonic-gate 	for (oconnp = connfp->connf_head; oconnp != NULL;
1020409Skcpoon 	    oconnp = oconnp->conn_next) {
10210Sstevel@tonic-gate 		if (oconnp->conn_lport == lport &&
10220Sstevel@tonic-gate 		    oconnp->conn_zoneid == connp->conn_zoneid &&
102311042SErik.Nordmark@Sun.COM 		    oconnp->conn_family == connp->conn_family &&
102411042SErik.Nordmark@Sun.COM 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
102511042SErik.Nordmark@Sun.COM 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
102611042SErik.Nordmark@Sun.COM 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
102711042SErik.Nordmark@Sun.COM 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
102811042SErik.Nordmark@Sun.COM 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
102911042SErik.Nordmark@Sun.COM 		    &connp->conn_laddr_v6))) {
10300Sstevel@tonic-gate 			break;
10310Sstevel@tonic-gate 		}
10320Sstevel@tonic-gate 	}
10330Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
10340Sstevel@tonic-gate 	if (oconnp != NULL)
10350Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
10360Sstevel@tonic-gate 
103711042SErik.Nordmark@Sun.COM 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
103811042SErik.Nordmark@Sun.COM 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
103911042SErik.Nordmark@Sun.COM 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
104011042SErik.Nordmark@Sun.COM 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
10410Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10420Sstevel@tonic-gate 		} else {
10430Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10440Sstevel@tonic-gate 		}
10450Sstevel@tonic-gate 	} else {
10460Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
10470Sstevel@tonic-gate 	}
10480Sstevel@tonic-gate 	return (0);
10490Sstevel@tonic-gate }
10500Sstevel@tonic-gate 
105110616SSebastien.Roy@Sun.COM static int
105211042SErik.Nordmark@Sun.COM ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
105310616SSebastien.Roy@Sun.COM {
105410616SSebastien.Roy@Sun.COM 	connf_t	*connfp;
105510616SSebastien.Roy@Sun.COM 	conn_t	*tconnp;
105611042SErik.Nordmark@Sun.COM 	ipaddr_t laddr = connp->conn_laddr_v4;
105711042SErik.Nordmark@Sun.COM 	ipaddr_t faddr = connp->conn_faddr_v4;
105810616SSebastien.Roy@Sun.COM 
105911042SErik.Nordmark@Sun.COM 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
106010616SSebastien.Roy@Sun.COM 	mutex_enter(&connfp->connf_lock);
106110616SSebastien.Roy@Sun.COM 	for (tconnp = connfp->connf_head; tconnp != NULL;
106210616SSebastien.Roy@Sun.COM 	    tconnp = tconnp->conn_next) {
106311042SErik.Nordmark@Sun.COM 		if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
106410616SSebastien.Roy@Sun.COM 			/* A tunnel is already bound to these addresses. */
106510616SSebastien.Roy@Sun.COM 			mutex_exit(&connfp->connf_lock);
106610616SSebastien.Roy@Sun.COM 			return (EADDRINUSE);
106710616SSebastien.Roy@Sun.COM 		}
106810616SSebastien.Roy@Sun.COM 	}
106910616SSebastien.Roy@Sun.COM 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
107010616SSebastien.Roy@Sun.COM 	mutex_exit(&connfp->connf_lock);
107110616SSebastien.Roy@Sun.COM 	return (0);
107210616SSebastien.Roy@Sun.COM }
107310616SSebastien.Roy@Sun.COM 
107410616SSebastien.Roy@Sun.COM static int
107511042SErik.Nordmark@Sun.COM ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
107610616SSebastien.Roy@Sun.COM {
107710616SSebastien.Roy@Sun.COM 	connf_t	*connfp;
107810616SSebastien.Roy@Sun.COM 	conn_t	*tconnp;
107911042SErik.Nordmark@Sun.COM 	in6_addr_t *laddr = &connp->conn_laddr_v6;
108011042SErik.Nordmark@Sun.COM 	in6_addr_t *faddr = &connp->conn_faddr_v6;
108110616SSebastien.Roy@Sun.COM 
108211042SErik.Nordmark@Sun.COM 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
108310616SSebastien.Roy@Sun.COM 	mutex_enter(&connfp->connf_lock);
108410616SSebastien.Roy@Sun.COM 	for (tconnp = connfp->connf_head; tconnp != NULL;
108510616SSebastien.Roy@Sun.COM 	    tconnp = tconnp->conn_next) {
108611042SErik.Nordmark@Sun.COM 		if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
108710616SSebastien.Roy@Sun.COM 			/* A tunnel is already bound to these addresses. */
108810616SSebastien.Roy@Sun.COM 			mutex_exit(&connfp->connf_lock);
108910616SSebastien.Roy@Sun.COM 			return (EADDRINUSE);
109010616SSebastien.Roy@Sun.COM 		}
109110616SSebastien.Roy@Sun.COM 	}
109210616SSebastien.Roy@Sun.COM 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
109310616SSebastien.Roy@Sun.COM 	mutex_exit(&connfp->connf_lock);
109410616SSebastien.Roy@Sun.COM 	return (0);
109510616SSebastien.Roy@Sun.COM }
109610616SSebastien.Roy@Sun.COM 
10970Sstevel@tonic-gate /*
10981676Sjpk  * Check for a MAC exemption conflict on a labeled system.  Note that for
10991676Sjpk  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
11001676Sjpk  * transport layer.  This check is for binding all other protocols.
11011676Sjpk  *
11021676Sjpk  * Returns true if there's a conflict.
11031676Sjpk  */
11041676Sjpk static boolean_t
11053448Sdh155122 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
11061676Sjpk {
11071676Sjpk 	connf_t	*connfp;
11081676Sjpk 	conn_t *tconn;
11091676Sjpk 
111011042SErik.Nordmark@Sun.COM 	connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
11111676Sjpk 	mutex_enter(&connfp->connf_lock);
11121676Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
11131676Sjpk 	    tconn = tconn->conn_next) {
11141676Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
111511042SErik.Nordmark@Sun.COM 		if (connp->conn_family != tconn->conn_family)
11161676Sjpk 			continue;
11171676Sjpk 		/* If neither is exempt, then there's no conflict */
111810934Ssommerfeld@sun.com 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
111910934Ssommerfeld@sun.com 		    (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
11201676Sjpk 			continue;
11219710SKen.Powell@Sun.COM 		/* We are only concerned about sockets for a different zone */
11229710SKen.Powell@Sun.COM 		if (connp->conn_zoneid == tconn->conn_zoneid)
11239710SKen.Powell@Sun.COM 			continue;
11241676Sjpk 		/* If both are bound to different specific addrs, ok */
112511042SErik.Nordmark@Sun.COM 		if (connp->conn_laddr_v4 != INADDR_ANY &&
112611042SErik.Nordmark@Sun.COM 		    tconn->conn_laddr_v4 != INADDR_ANY &&
112711042SErik.Nordmark@Sun.COM 		    connp->conn_laddr_v4 != tconn->conn_laddr_v4)
11281676Sjpk 			continue;
11291676Sjpk 		/* These two conflict; fail */
11301676Sjpk 		break;
11311676Sjpk 	}
11321676Sjpk 	mutex_exit(&connfp->connf_lock);
11331676Sjpk 	return (tconn != NULL);
11341676Sjpk }
11351676Sjpk 
11361676Sjpk static boolean_t
11373448Sdh155122 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
11381676Sjpk {
11391676Sjpk 	connf_t	*connfp;
11401676Sjpk 	conn_t *tconn;
11411676Sjpk 
114211042SErik.Nordmark@Sun.COM 	connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
11431676Sjpk 	mutex_enter(&connfp->connf_lock);
11441676Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
11451676Sjpk 	    tconn = tconn->conn_next) {
11461676Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
114711042SErik.Nordmark@Sun.COM 		if (connp->conn_family != tconn->conn_family)
11481676Sjpk 			continue;
11491676Sjpk 		/* If neither is exempt, then there's no conflict */
115010934Ssommerfeld@sun.com 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
115110934Ssommerfeld@sun.com 		    (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
11521676Sjpk 			continue;
11539710SKen.Powell@Sun.COM 		/* We are only concerned about sockets for a different zone */
11549710SKen.Powell@Sun.COM 		if (connp->conn_zoneid == tconn->conn_zoneid)
11559710SKen.Powell@Sun.COM 			continue;
11561676Sjpk 		/* If both are bound to different addrs, ok */
115711042SErik.Nordmark@Sun.COM 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
115811042SErik.Nordmark@Sun.COM 		    !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
115911042SErik.Nordmark@Sun.COM 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
116011042SErik.Nordmark@Sun.COM 		    &tconn->conn_laddr_v6))
11611676Sjpk 			continue;
11621676Sjpk 		/* These two conflict; fail */
11631676Sjpk 		break;
11641676Sjpk 	}
11651676Sjpk 	mutex_exit(&connfp->connf_lock);
11661676Sjpk 	return (tconn != NULL);
11671676Sjpk }
11681676Sjpk 
11691676Sjpk /*
11700Sstevel@tonic-gate  * (v4, v6) bind hash insertion routines
117111042SErik.Nordmark@Sun.COM  * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
11720Sstevel@tonic-gate  */
117311042SErik.Nordmark@Sun.COM 
11740Sstevel@tonic-gate int
117511042SErik.Nordmark@Sun.COM ipcl_bind_insert(conn_t *connp)
117611042SErik.Nordmark@Sun.COM {
117711042SErik.Nordmark@Sun.COM 	if (connp->conn_ipversion == IPV6_VERSION)
117811042SErik.Nordmark@Sun.COM 		return (ipcl_bind_insert_v6(connp));
117911042SErik.Nordmark@Sun.COM 	else
118011042SErik.Nordmark@Sun.COM 		return (ipcl_bind_insert_v4(connp));
118111042SErik.Nordmark@Sun.COM }
118211042SErik.Nordmark@Sun.COM 
118311042SErik.Nordmark@Sun.COM int
118411042SErik.Nordmark@Sun.COM ipcl_bind_insert_v4(conn_t *connp)
11850Sstevel@tonic-gate {
11860Sstevel@tonic-gate 	connf_t	*connfp;
11870Sstevel@tonic-gate 	int	ret = 0;
11883448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
118911042SErik.Nordmark@Sun.COM 	uint16_t	lport = connp->conn_lport;
119011042SErik.Nordmark@Sun.COM 	uint8_t		protocol = connp->conn_proto;
11910Sstevel@tonic-gate 
119210616SSebastien.Roy@Sun.COM 	if (IPCL_IS_IPTUN(connp))
119311042SErik.Nordmark@Sun.COM 		return (ipcl_iptun_hash_insert(connp, ipst));
119410616SSebastien.Roy@Sun.COM 
11950Sstevel@tonic-gate 	switch (protocol) {
11961676Sjpk 	default:
11973448Sdh155122 		if (is_system_labeled() &&
11983448Sdh155122 		    check_exempt_conflict_v4(connp, ipst))
11991676Sjpk 			return (EADDRINUSE);
12001676Sjpk 		/* FALLTHROUGH */
12010Sstevel@tonic-gate 	case IPPROTO_UDP:
12020Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
12033448Sdh155122 			connfp = &ipst->ips_ipcl_udp_fanout[
12043448Sdh155122 			    IPCL_UDP_HASH(lport, ipst)];
12050Sstevel@tonic-gate 		} else {
120611042SErik.Nordmark@Sun.COM 			connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
12070Sstevel@tonic-gate 		}
12080Sstevel@tonic-gate 
120911042SErik.Nordmark@Sun.COM 		if (connp->conn_faddr_v4 != INADDR_ANY) {
12100Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
121111042SErik.Nordmark@Sun.COM 		} else if (connp->conn_laddr_v4 != INADDR_ANY) {
12120Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12130Sstevel@tonic-gate 		} else {
12140Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12150Sstevel@tonic-gate 		}
121611042SErik.Nordmark@Sun.COM 		if (protocol == IPPROTO_RSVP)
121711042SErik.Nordmark@Sun.COM 			ill_set_inputfn_all(ipst);
12180Sstevel@tonic-gate 		break;
12190Sstevel@tonic-gate 
12200Sstevel@tonic-gate 	case IPPROTO_TCP:
12210Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
12221676Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
12233448Sdh155122 		connfp = &ipst->ips_ipcl_bind_fanout[
12243448Sdh155122 		    IPCL_BIND_HASH(lport, ipst)];
122511042SErik.Nordmark@Sun.COM 		if (connp->conn_laddr_v4 != INADDR_ANY) {
12260Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12270Sstevel@tonic-gate 		} else {
12280Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12290Sstevel@tonic-gate 		}
12300Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
123111042SErik.Nordmark@Sun.COM 			ASSERT(connp->conn_ipversion == IPV4_VERSION);
12320Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
12338392SHuafeng.Lv@Sun.COM 			(*cl_inet_listen)(
12348392SHuafeng.Lv@Sun.COM 			    connp->conn_netstack->netstack_stackid,
12358392SHuafeng.Lv@Sun.COM 			    IPPROTO_TCP, AF_INET,
123611042SErik.Nordmark@Sun.COM 			    (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
12370Sstevel@tonic-gate 		}
12380Sstevel@tonic-gate 		break;
12390Sstevel@tonic-gate 
12400Sstevel@tonic-gate 	case IPPROTO_SCTP:
12410Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
12420Sstevel@tonic-gate 		break;
12430Sstevel@tonic-gate 	}
12440Sstevel@tonic-gate 
12450Sstevel@tonic-gate 	return (ret);
12460Sstevel@tonic-gate }
12470Sstevel@tonic-gate 
12480Sstevel@tonic-gate int
124911042SErik.Nordmark@Sun.COM ipcl_bind_insert_v6(conn_t *connp)
12500Sstevel@tonic-gate {
125110616SSebastien.Roy@Sun.COM 	connf_t		*connfp;
125210616SSebastien.Roy@Sun.COM 	int		ret = 0;
12533448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
125411042SErik.Nordmark@Sun.COM 	uint16_t	lport = connp->conn_lport;
125511042SErik.Nordmark@Sun.COM 	uint8_t		protocol = connp->conn_proto;
12560Sstevel@tonic-gate 
125710616SSebastien.Roy@Sun.COM 	if (IPCL_IS_IPTUN(connp)) {
125811042SErik.Nordmark@Sun.COM 		return (ipcl_iptun_hash_insert_v6(connp, ipst));
125910616SSebastien.Roy@Sun.COM 	}
126010616SSebastien.Roy@Sun.COM 
12610Sstevel@tonic-gate 	switch (protocol) {
12621676Sjpk 	default:
12633448Sdh155122 		if (is_system_labeled() &&
12643448Sdh155122 		    check_exempt_conflict_v6(connp, ipst))
12651676Sjpk 			return (EADDRINUSE);
12661676Sjpk 		/* FALLTHROUGH */
12670Sstevel@tonic-gate 	case IPPROTO_UDP:
12680Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
12693448Sdh155122 			connfp = &ipst->ips_ipcl_udp_fanout[
12703448Sdh155122 			    IPCL_UDP_HASH(lport, ipst)];
12710Sstevel@tonic-gate 		} else {
12723448Sdh155122 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
12730Sstevel@tonic-gate 		}
12740Sstevel@tonic-gate 
127511042SErik.Nordmark@Sun.COM 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
12760Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
127711042SErik.Nordmark@Sun.COM 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
12780Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12790Sstevel@tonic-gate 		} else {
12800Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12810Sstevel@tonic-gate 		}
12820Sstevel@tonic-gate 		break;
12830Sstevel@tonic-gate 
12840Sstevel@tonic-gate 	case IPPROTO_TCP:
12850Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
12861676Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
12873448Sdh155122 		connfp = &ipst->ips_ipcl_bind_fanout[
12883448Sdh155122 		    IPCL_BIND_HASH(lport, ipst)];
128911042SErik.Nordmark@Sun.COM 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
12900Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12910Sstevel@tonic-gate 		} else {
12920Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12930Sstevel@tonic-gate 		}
12940Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
12950Sstevel@tonic-gate 			sa_family_t	addr_family;
12960Sstevel@tonic-gate 			uint8_t		*laddrp;
12970Sstevel@tonic-gate 
129811042SErik.Nordmark@Sun.COM 			if (connp->conn_ipversion == IPV6_VERSION) {
12990Sstevel@tonic-gate 				addr_family = AF_INET6;
13000Sstevel@tonic-gate 				laddrp =
130111042SErik.Nordmark@Sun.COM 				    (uint8_t *)&connp->conn_bound_addr_v6;
13020Sstevel@tonic-gate 			} else {
13030Sstevel@tonic-gate 				addr_family = AF_INET;
130411042SErik.Nordmark@Sun.COM 				laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
13050Sstevel@tonic-gate 			}
13060Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
13078392SHuafeng.Lv@Sun.COM 			(*cl_inet_listen)(
13088392SHuafeng.Lv@Sun.COM 			    connp->conn_netstack->netstack_stackid,
13098392SHuafeng.Lv@Sun.COM 			    IPPROTO_TCP, addr_family, laddrp, lport, NULL);
13100Sstevel@tonic-gate 		}
13110Sstevel@tonic-gate 		break;
13120Sstevel@tonic-gate 
13130Sstevel@tonic-gate 	case IPPROTO_SCTP:
13140Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
13150Sstevel@tonic-gate 		break;
13160Sstevel@tonic-gate 	}
13170Sstevel@tonic-gate 
13180Sstevel@tonic-gate 	return (ret);
13190Sstevel@tonic-gate }
13200Sstevel@tonic-gate 
13210Sstevel@tonic-gate /*
13220Sstevel@tonic-gate  * ipcl_conn_hash insertion routines.
132311042SErik.Nordmark@Sun.COM  * The caller has already set conn_proto and the addresses/ports in the conn_t.
13240Sstevel@tonic-gate  */
132511042SErik.Nordmark@Sun.COM 
13260Sstevel@tonic-gate int
132711042SErik.Nordmark@Sun.COM ipcl_conn_insert(conn_t *connp)
132811042SErik.Nordmark@Sun.COM {
132911042SErik.Nordmark@Sun.COM 	if (connp->conn_ipversion == IPV6_VERSION)
133011042SErik.Nordmark@Sun.COM 		return (ipcl_conn_insert_v6(connp));
133111042SErik.Nordmark@Sun.COM 	else
133211042SErik.Nordmark@Sun.COM 		return (ipcl_conn_insert_v4(connp));
133311042SErik.Nordmark@Sun.COM }
133411042SErik.Nordmark@Sun.COM 
133511042SErik.Nordmark@Sun.COM int
133611042SErik.Nordmark@Sun.COM ipcl_conn_insert_v4(conn_t *connp)
13370Sstevel@tonic-gate {
13380Sstevel@tonic-gate 	connf_t		*connfp;
13390Sstevel@tonic-gate 	conn_t		*tconnp;
13400Sstevel@tonic-gate 	int		ret = 0;
13413448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
134211042SErik.Nordmark@Sun.COM 	uint16_t	lport = connp->conn_lport;
134311042SErik.Nordmark@Sun.COM 	uint8_t		protocol = connp->conn_proto;
13440Sstevel@tonic-gate 
134510616SSebastien.Roy@Sun.COM 	if (IPCL_IS_IPTUN(connp))
134611042SErik.Nordmark@Sun.COM 		return (ipcl_iptun_hash_insert(connp, ipst));
134710616SSebastien.Roy@Sun.COM 
13480Sstevel@tonic-gate 	switch (protocol) {
13490Sstevel@tonic-gate 	case IPPROTO_TCP:
13508432SJonathan.Anderson@Sun.COM 		/*
135111042SErik.Nordmark@Sun.COM 		 * For TCP, we check whether the connection tuple already
13528432SJonathan.Anderson@Sun.COM 		 * exists before allowing the connection to proceed.  We
13538432SJonathan.Anderson@Sun.COM 		 * also allow indexing on the zoneid. This is to allow
13548432SJonathan.Anderson@Sun.COM 		 * multiple shared stack zones to have the same tcp
13558432SJonathan.Anderson@Sun.COM 		 * connection tuple. In practice this only happens for
13568432SJonathan.Anderson@Sun.COM 		 * INADDR_LOOPBACK as it's the only local address which
13578432SJonathan.Anderson@Sun.COM 		 * doesn't have to be unique.
13588432SJonathan.Anderson@Sun.COM 		 */
13593448Sdh155122 		connfp = &ipst->ips_ipcl_conn_fanout[
136011042SErik.Nordmark@Sun.COM 		    IPCL_CONN_HASH(connp->conn_faddr_v4,
13613448Sdh155122 		    connp->conn_ports, ipst)];
13620Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
13630Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
13640Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
136511042SErik.Nordmark@Sun.COM 			if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
136611042SErik.Nordmark@Sun.COM 			    connp->conn_faddr_v4, connp->conn_laddr_v4,
136711042SErik.Nordmark@Sun.COM 			    connp->conn_ports) &&
136811042SErik.Nordmark@Sun.COM 			    IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
13690Sstevel@tonic-gate 				/* Already have a conn. bail out */
13700Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
13710Sstevel@tonic-gate 				return (EADDRINUSE);
13720Sstevel@tonic-gate 			}
13730Sstevel@tonic-gate 		}
13740Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
13750Sstevel@tonic-gate 			/*
13760Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
13770Sstevel@tonic-gate 			 * rebind. Let it happen.
13780Sstevel@tonic-gate 			 */
13790Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
13800Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
13810Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
13820Sstevel@tonic-gate 		}
13833104Sjprakash 
13843104Sjprakash 		ASSERT(connp->conn_recv != NULL);
138511042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_recvicmp != NULL);
13863104Sjprakash 
13870Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
13880Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
13890Sstevel@tonic-gate 		break;
13900Sstevel@tonic-gate 
13910Sstevel@tonic-gate 	case IPPROTO_SCTP:
1392409Skcpoon 		/*
1393409Skcpoon 		 * The raw socket may have already been bound, remove it
1394409Skcpoon 		 * from the hash first.
1395409Skcpoon 		 */
1396409Skcpoon 		IPCL_HASH_REMOVE(connp);
13970Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
13980Sstevel@tonic-gate 		break;
13990Sstevel@tonic-gate 
14001676Sjpk 	default:
14011676Sjpk 		/*
14021676Sjpk 		 * Check for conflicts among MAC exempt bindings.  For
14031676Sjpk 		 * transports with port numbers, this is done by the upper
14041676Sjpk 		 * level per-transport binding logic.  For all others, it's
14051676Sjpk 		 * done here.
14061676Sjpk 		 */
14073448Sdh155122 		if (is_system_labeled() &&
14083448Sdh155122 		    check_exempt_conflict_v4(connp, ipst))
14091676Sjpk 			return (EADDRINUSE);
14101676Sjpk 		/* FALLTHROUGH */
14111676Sjpk 
14120Sstevel@tonic-gate 	case IPPROTO_UDP:
14130Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
14143448Sdh155122 			connfp = &ipst->ips_ipcl_udp_fanout[
141511042SErik.Nordmark@Sun.COM 			    IPCL_UDP_HASH(lport, ipst)];
14160Sstevel@tonic-gate 		} else {
141711042SErik.Nordmark@Sun.COM 			connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
14180Sstevel@tonic-gate 		}
14190Sstevel@tonic-gate 
142011042SErik.Nordmark@Sun.COM 		if (connp->conn_faddr_v4 != INADDR_ANY) {
14210Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
142211042SErik.Nordmark@Sun.COM 		} else if (connp->conn_laddr_v4 != INADDR_ANY) {
14230Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
14240Sstevel@tonic-gate 		} else {
14250Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
14260Sstevel@tonic-gate 		}
14270Sstevel@tonic-gate 		break;
14280Sstevel@tonic-gate 	}
14290Sstevel@tonic-gate 
14300Sstevel@tonic-gate 	return (ret);
14310Sstevel@tonic-gate }
14320Sstevel@tonic-gate 
14330Sstevel@tonic-gate int
143411042SErik.Nordmark@Sun.COM ipcl_conn_insert_v6(conn_t *connp)
14350Sstevel@tonic-gate {
14360Sstevel@tonic-gate 	connf_t		*connfp;
14370Sstevel@tonic-gate 	conn_t		*tconnp;
14380Sstevel@tonic-gate 	int		ret = 0;
14393448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
144011042SErik.Nordmark@Sun.COM 	uint16_t	lport = connp->conn_lport;
144111042SErik.Nordmark@Sun.COM 	uint8_t		protocol = connp->conn_proto;
144211042SErik.Nordmark@Sun.COM 	uint_t		ifindex = connp->conn_bound_if;
14430Sstevel@tonic-gate 
144410616SSebastien.Roy@Sun.COM 	if (IPCL_IS_IPTUN(connp))
144511042SErik.Nordmark@Sun.COM 		return (ipcl_iptun_hash_insert_v6(connp, ipst));
144610616SSebastien.Roy@Sun.COM 
14470Sstevel@tonic-gate 	switch (protocol) {
14480Sstevel@tonic-gate 	case IPPROTO_TCP:
14498432SJonathan.Anderson@Sun.COM 
14508432SJonathan.Anderson@Sun.COM 		/*
14518432SJonathan.Anderson@Sun.COM 		 * For tcp, we check whether the connection tuple already
14528432SJonathan.Anderson@Sun.COM 		 * exists before allowing the connection to proceed.  We
14538432SJonathan.Anderson@Sun.COM 		 * also allow indexing on the zoneid. This is to allow
14548432SJonathan.Anderson@Sun.COM 		 * multiple shared stack zones to have the same tcp
14558432SJonathan.Anderson@Sun.COM 		 * connection tuple. In practice this only happens for
14568432SJonathan.Anderson@Sun.COM 		 * ipv6_loopback as it's the only local address which
14578432SJonathan.Anderson@Sun.COM 		 * doesn't have to be unique.
14588432SJonathan.Anderson@Sun.COM 		 */
14593448Sdh155122 		connfp = &ipst->ips_ipcl_conn_fanout[
146011042SErik.Nordmark@Sun.COM 		    IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
14613448Sdh155122 		    ipst)];
14620Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
14630Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
14640Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
146511042SErik.Nordmark@Sun.COM 			/* NOTE: need to match zoneid. Bug in onnv-gate */
146611042SErik.Nordmark@Sun.COM 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
146711042SErik.Nordmark@Sun.COM 			    connp->conn_faddr_v6, connp->conn_laddr_v6,
14680Sstevel@tonic-gate 			    connp->conn_ports) &&
146911042SErik.Nordmark@Sun.COM 			    (tconnp->conn_bound_if == 0 ||
147011042SErik.Nordmark@Sun.COM 			    tconnp->conn_bound_if == ifindex) &&
147111042SErik.Nordmark@Sun.COM 			    IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
14720Sstevel@tonic-gate 				/* Already have a conn. bail out */
14730Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
14740Sstevel@tonic-gate 				return (EADDRINUSE);
14750Sstevel@tonic-gate 			}
14760Sstevel@tonic-gate 		}
14770Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
14780Sstevel@tonic-gate 			/*
14790Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
14800Sstevel@tonic-gate 			 * rebind. Let it happen.
14810Sstevel@tonic-gate 			 */
14820Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
14830Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
14840Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
14850Sstevel@tonic-gate 		}
14860Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
14870Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
14880Sstevel@tonic-gate 		break;
14890Sstevel@tonic-gate 
14900Sstevel@tonic-gate 	case IPPROTO_SCTP:
1491409Skcpoon 		IPCL_HASH_REMOVE(connp);
14920Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
14930Sstevel@tonic-gate 		break;
14940Sstevel@tonic-gate 
14951676Sjpk 	default:
14963448Sdh155122 		if (is_system_labeled() &&
14973448Sdh155122 		    check_exempt_conflict_v6(connp, ipst))
14981676Sjpk 			return (EADDRINUSE);
14991676Sjpk 		/* FALLTHROUGH */
15000Sstevel@tonic-gate 	case IPPROTO_UDP:
15010Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
15023448Sdh155122 			connfp = &ipst->ips_ipcl_udp_fanout[
150311042SErik.Nordmark@Sun.COM 			    IPCL_UDP_HASH(lport, ipst)];
15040Sstevel@tonic-gate 		} else {
15053448Sdh155122 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
15060Sstevel@tonic-gate 		}
15070Sstevel@tonic-gate 
150811042SErik.Nordmark@Sun.COM 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
15090Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
151011042SErik.Nordmark@Sun.COM 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
15110Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
15120Sstevel@tonic-gate 		} else {
15130Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
15140Sstevel@tonic-gate 		}
15150Sstevel@tonic-gate 		break;
15160Sstevel@tonic-gate 	}
15170Sstevel@tonic-gate 
15180Sstevel@tonic-gate 	return (ret);
15190Sstevel@tonic-gate }
15200Sstevel@tonic-gate 
15210Sstevel@tonic-gate /*
15220Sstevel@tonic-gate  * v4 packet classifying function. looks up the fanout table to
15230Sstevel@tonic-gate  * find the conn, the packet belongs to. returns the conn with
15240Sstevel@tonic-gate  * the reference held, null otherwise.
15251676Sjpk  *
15261676Sjpk  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
15271676Sjpk  * Lookup" comment block are applied.  Labels are also checked as described
15281676Sjpk  * above.  If the packet is from the inside (looped back), and is from the same
15291676Sjpk  * zone, then label checks are omitted.
15300Sstevel@tonic-gate  */
15310Sstevel@tonic-gate conn_t *
153211042SErik.Nordmark@Sun.COM ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
153311042SErik.Nordmark@Sun.COM     ip_recv_attr_t *ira, ip_stack_t *ipst)
15340Sstevel@tonic-gate {
15350Sstevel@tonic-gate 	ipha_t	*ipha;
15360Sstevel@tonic-gate 	connf_t	*connfp, *bind_connfp;
15370Sstevel@tonic-gate 	uint16_t lport;
15380Sstevel@tonic-gate 	uint16_t fport;
15390Sstevel@tonic-gate 	uint32_t ports;
15400Sstevel@tonic-gate 	conn_t	*connp;
15410Sstevel@tonic-gate 	uint16_t  *up;
154211042SErik.Nordmark@Sun.COM 	zoneid_t	zoneid = ira->ira_zoneid;
15430Sstevel@tonic-gate 
15440Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
15450Sstevel@tonic-gate 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
15460Sstevel@tonic-gate 
15470Sstevel@tonic-gate 	switch (protocol) {
15480Sstevel@tonic-gate 	case IPPROTO_TCP:
15490Sstevel@tonic-gate 		ports = *(uint32_t *)up;
15500Sstevel@tonic-gate 		connfp =
15513448Sdh155122 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
15523448Sdh155122 		    ports, ipst)];
15530Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
15540Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
15550Sstevel@tonic-gate 		    connp = connp->conn_next) {
155611042SErik.Nordmark@Sun.COM 			if (IPCL_CONN_MATCH(connp, protocol,
155711042SErik.Nordmark@Sun.COM 			    ipha->ipha_src, ipha->ipha_dst, ports) &&
155811042SErik.Nordmark@Sun.COM 			    (connp->conn_zoneid == zoneid ||
155911042SErik.Nordmark@Sun.COM 			    connp->conn_allzones ||
156011042SErik.Nordmark@Sun.COM 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
156111042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
156211042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
15630Sstevel@tonic-gate 				break;
15640Sstevel@tonic-gate 		}
15650Sstevel@tonic-gate 
15660Sstevel@tonic-gate 		if (connp != NULL) {
15671676Sjpk 			/*
15681676Sjpk 			 * We have a fully-bound TCP connection.
15691676Sjpk 			 *
15701676Sjpk 			 * For labeled systems, there's no need to check the
15711676Sjpk 			 * label here.  It's known to be good as we checked
15721676Sjpk 			 * before allowing the connection to become bound.
15731676Sjpk 			 */
15740Sstevel@tonic-gate 			CONN_INC_REF(connp);
15750Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
15760Sstevel@tonic-gate 			return (connp);
15770Sstevel@tonic-gate 		}
15780Sstevel@tonic-gate 
15790Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
15800Sstevel@tonic-gate 		lport = up[1];
15813448Sdh155122 		bind_connfp =
15823448Sdh155122 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
15830Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
15840Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
15850Sstevel@tonic-gate 		    connp = connp->conn_next) {
15861676Sjpk 			if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
158711042SErik.Nordmark@Sun.COM 			    lport) &&
158811042SErik.Nordmark@Sun.COM 			    (connp->conn_zoneid == zoneid ||
158911042SErik.Nordmark@Sun.COM 			    connp->conn_allzones ||
159011042SErik.Nordmark@Sun.COM 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
159111042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
159211042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
15930Sstevel@tonic-gate 				break;
15940Sstevel@tonic-gate 		}
15950Sstevel@tonic-gate 
15961676Sjpk 		/*
15971676Sjpk 		 * If the matching connection is SLP on a private address, then
15981676Sjpk 		 * the label on the packet must match the local zone's label.
15991676Sjpk 		 * Otherwise, it must be in the label range defined by tnrh.
160011042SErik.Nordmark@Sun.COM 		 * This is ensured by tsol_receive_local.
160111042SErik.Nordmark@Sun.COM 		 *
160211042SErik.Nordmark@Sun.COM 		 * Note that we don't check tsol_receive_local for
160311042SErik.Nordmark@Sun.COM 		 * the connected case.
16041676Sjpk 		 */
160511042SErik.Nordmark@Sun.COM 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
16061676Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
160711042SErik.Nordmark@Sun.COM 		    ira, connp)) {
160811042SErik.Nordmark@Sun.COM 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
160911042SErik.Nordmark@Sun.COM 			    char *, "connp(1) could not receive mp(2)",
161011042SErik.Nordmark@Sun.COM 			    conn_t *, connp, mblk_t *, mp);
16111676Sjpk 			connp = NULL;
16121676Sjpk 		}
16131676Sjpk 
16140Sstevel@tonic-gate 		if (connp != NULL) {
16151676Sjpk 			/* Have a listener at least */
16160Sstevel@tonic-gate 			CONN_INC_REF(connp);
16170Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
16180Sstevel@tonic-gate 			return (connp);
16190Sstevel@tonic-gate 		}
16200Sstevel@tonic-gate 
16210Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
16220Sstevel@tonic-gate 		break;
16230Sstevel@tonic-gate 
16240Sstevel@tonic-gate 	case IPPROTO_UDP:
16250Sstevel@tonic-gate 		lport = up[1];
16260Sstevel@tonic-gate 		fport = up[0];
16273448Sdh155122 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
16280Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
16290Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
16300Sstevel@tonic-gate 		    connp = connp->conn_next) {
16310Sstevel@tonic-gate 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
16320Sstevel@tonic-gate 			    fport, ipha->ipha_src) &&
163311042SErik.Nordmark@Sun.COM 			    (connp->conn_zoneid == zoneid ||
163411042SErik.Nordmark@Sun.COM 			    connp->conn_allzones ||
163511042SErik.Nordmark@Sun.COM 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
163611042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
16370Sstevel@tonic-gate 				break;
16380Sstevel@tonic-gate 		}
16390Sstevel@tonic-gate 
164011042SErik.Nordmark@Sun.COM 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
16411676Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
164211042SErik.Nordmark@Sun.COM 		    ira, connp)) {
16431676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp,
16441676Sjpk 			    char *, "connp(1) could not receive mp(2)",
16451676Sjpk 			    conn_t *, connp, mblk_t *, mp);
16461676Sjpk 			connp = NULL;
16471676Sjpk 		}
16481676Sjpk 
16490Sstevel@tonic-gate 		if (connp != NULL) {
16500Sstevel@tonic-gate 			CONN_INC_REF(connp);
16510Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
16520Sstevel@tonic-gate 			return (connp);
16530Sstevel@tonic-gate 		}
16540Sstevel@tonic-gate 
16550Sstevel@tonic-gate 		/*
16560Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
16570Sstevel@tonic-gate 		 */
16580Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
165911042SErik.Nordmark@Sun.COM 
16600Sstevel@tonic-gate 		break;
166110616SSebastien.Roy@Sun.COM 
166210616SSebastien.Roy@Sun.COM 	case IPPROTO_ENCAP:
166310616SSebastien.Roy@Sun.COM 	case IPPROTO_IPV6:
166410616SSebastien.Roy@Sun.COM 		return (ipcl_iptun_classify_v4(&ipha->ipha_src,
166510616SSebastien.Roy@Sun.COM 		    &ipha->ipha_dst, ipst));
16660Sstevel@tonic-gate 	}
16670Sstevel@tonic-gate 
16680Sstevel@tonic-gate 	return (NULL);
16690Sstevel@tonic-gate }
16700Sstevel@tonic-gate 
16710Sstevel@tonic-gate conn_t *
167211042SErik.Nordmark@Sun.COM ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
167311042SErik.Nordmark@Sun.COM     ip_recv_attr_t *ira, ip_stack_t *ipst)
16740Sstevel@tonic-gate {
16750Sstevel@tonic-gate 	ip6_t		*ip6h;
16760Sstevel@tonic-gate 	connf_t		*connfp, *bind_connfp;
16770Sstevel@tonic-gate 	uint16_t	lport;
16780Sstevel@tonic-gate 	uint16_t	fport;
167911042SErik.Nordmark@Sun.COM 	tcpha_t		*tcpha;
16800Sstevel@tonic-gate 	uint32_t	ports;
16810Sstevel@tonic-gate 	conn_t		*connp;
16820Sstevel@tonic-gate 	uint16_t	*up;
168311042SErik.Nordmark@Sun.COM 	zoneid_t	zoneid = ira->ira_zoneid;
16840Sstevel@tonic-gate 
16850Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
16860Sstevel@tonic-gate 
16870Sstevel@tonic-gate 	switch (protocol) {
16880Sstevel@tonic-gate 	case IPPROTO_TCP:
168911042SErik.Nordmark@Sun.COM 		tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
169011042SErik.Nordmark@Sun.COM 		up = &tcpha->tha_lport;
16910Sstevel@tonic-gate 		ports = *(uint32_t *)up;
16920Sstevel@tonic-gate 
16930Sstevel@tonic-gate 		connfp =
16943448Sdh155122 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
16953448Sdh155122 		    ports, ipst)];
16960Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
16970Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
16980Sstevel@tonic-gate 		    connp = connp->conn_next) {
169911042SErik.Nordmark@Sun.COM 			if (IPCL_CONN_MATCH_V6(connp, protocol,
170011042SErik.Nordmark@Sun.COM 			    ip6h->ip6_src, ip6h->ip6_dst, ports) &&
170111042SErik.Nordmark@Sun.COM 			    (connp->conn_zoneid == zoneid ||
170211042SErik.Nordmark@Sun.COM 			    connp->conn_allzones ||
170311042SErik.Nordmark@Sun.COM 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
170411042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
170511042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17060Sstevel@tonic-gate 				break;
17070Sstevel@tonic-gate 		}
17080Sstevel@tonic-gate 
17090Sstevel@tonic-gate 		if (connp != NULL) {
17101676Sjpk 			/*
17111676Sjpk 			 * We have a fully-bound TCP connection.
17121676Sjpk 			 *
17131676Sjpk 			 * For labeled systems, there's no need to check the
17141676Sjpk 			 * label here.  It's known to be good as we checked
17151676Sjpk 			 * before allowing the connection to become bound.
17161676Sjpk 			 */
17170Sstevel@tonic-gate 			CONN_INC_REF(connp);
17180Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17190Sstevel@tonic-gate 			return (connp);
17200Sstevel@tonic-gate 		}
17210Sstevel@tonic-gate 
17220Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
17230Sstevel@tonic-gate 
17240Sstevel@tonic-gate 		lport = up[1];
17253448Sdh155122 		bind_connfp =
17263448Sdh155122 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
17270Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
17280Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
17290Sstevel@tonic-gate 		    connp = connp->conn_next) {
17300Sstevel@tonic-gate 			if (IPCL_BIND_MATCH_V6(connp, protocol,
17310Sstevel@tonic-gate 			    ip6h->ip6_dst, lport) &&
173211042SErik.Nordmark@Sun.COM 			    (connp->conn_zoneid == zoneid ||
173311042SErik.Nordmark@Sun.COM 			    connp->conn_allzones ||
173411042SErik.Nordmark@Sun.COM 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
173511042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
173611042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17370Sstevel@tonic-gate 				break;
17380Sstevel@tonic-gate 		}
17390Sstevel@tonic-gate 
174011042SErik.Nordmark@Sun.COM 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
17411676Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
174211042SErik.Nordmark@Sun.COM 		    ira, connp)) {
17431676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
17441676Sjpk 			    char *, "connp(1) could not receive mp(2)",
17451676Sjpk 			    conn_t *, connp, mblk_t *, mp);
17461676Sjpk 			connp = NULL;
17471676Sjpk 		}
17481676Sjpk 
17490Sstevel@tonic-gate 		if (connp != NULL) {
17500Sstevel@tonic-gate 			/* Have a listner at least */
17510Sstevel@tonic-gate 			CONN_INC_REF(connp);
17520Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
17530Sstevel@tonic-gate 			return (connp);
17540Sstevel@tonic-gate 		}
17550Sstevel@tonic-gate 
17560Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
17570Sstevel@tonic-gate 		break;
17580Sstevel@tonic-gate 
17590Sstevel@tonic-gate 	case IPPROTO_UDP:
17600Sstevel@tonic-gate 		up = (uint16_t *)&mp->b_rptr[hdr_len];
17610Sstevel@tonic-gate 		lport = up[1];
17620Sstevel@tonic-gate 		fport = up[0];
17633448Sdh155122 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
17640Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
17650Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
17660Sstevel@tonic-gate 		    connp = connp->conn_next) {
17670Sstevel@tonic-gate 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
17680Sstevel@tonic-gate 			    fport, ip6h->ip6_src) &&
176911042SErik.Nordmark@Sun.COM 			    (connp->conn_zoneid == zoneid ||
177011042SErik.Nordmark@Sun.COM 			    connp->conn_allzones ||
177111042SErik.Nordmark@Sun.COM 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
177211042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
177311042SErik.Nordmark@Sun.COM 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17740Sstevel@tonic-gate 				break;
17750Sstevel@tonic-gate 		}
17760Sstevel@tonic-gate 
177711042SErik.Nordmark@Sun.COM 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
17781676Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
177911042SErik.Nordmark@Sun.COM 		    ira, connp)) {
17801676Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
17811676Sjpk 			    char *, "connp(1) could not receive mp(2)",
17821676Sjpk 			    conn_t *, connp, mblk_t *, mp);
17831676Sjpk 			connp = NULL;
17841676Sjpk 		}
17851676Sjpk 
17860Sstevel@tonic-gate 		if (connp != NULL) {
17870Sstevel@tonic-gate 			CONN_INC_REF(connp);
17880Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17890Sstevel@tonic-gate 			return (connp);
17900Sstevel@tonic-gate 		}
17910Sstevel@tonic-gate 
17920Sstevel@tonic-gate 		/*
17930Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
17940Sstevel@tonic-gate 		 */
17950Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
17960Sstevel@tonic-gate 		break;
179710616SSebastien.Roy@Sun.COM 	case IPPROTO_ENCAP:
179810616SSebastien.Roy@Sun.COM 	case IPPROTO_IPV6:
179910616SSebastien.Roy@Sun.COM 		return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
180010616SSebastien.Roy@Sun.COM 		    &ip6h->ip6_dst, ipst));
18010Sstevel@tonic-gate 	}
18020Sstevel@tonic-gate 
18030Sstevel@tonic-gate 	return (NULL);
18040Sstevel@tonic-gate }
18050Sstevel@tonic-gate 
18060Sstevel@tonic-gate /*
18070Sstevel@tonic-gate  * wrapper around ipcl_classify_(v4,v6) routines.
18080Sstevel@tonic-gate  */
18090Sstevel@tonic-gate conn_t *
181011042SErik.Nordmark@Sun.COM ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
18110Sstevel@tonic-gate {
181211042SErik.Nordmark@Sun.COM 	if (ira->ira_flags & IRAF_IS_IPV4) {
181311042SErik.Nordmark@Sun.COM 		return (ipcl_classify_v4(mp, ira->ira_protocol,
181411042SErik.Nordmark@Sun.COM 		    ira->ira_ip_hdr_length, ira, ipst));
181511042SErik.Nordmark@Sun.COM 	} else {
181611042SErik.Nordmark@Sun.COM 		return (ipcl_classify_v6(mp, ira->ira_protocol,
181711042SErik.Nordmark@Sun.COM 		    ira->ira_ip_hdr_length, ira, ipst));
18180Sstevel@tonic-gate 	}
18190Sstevel@tonic-gate }
18200Sstevel@tonic-gate 
182111042SErik.Nordmark@Sun.COM /*
182211042SErik.Nordmark@Sun.COM  * Only used to classify SCTP RAW sockets
182311042SErik.Nordmark@Sun.COM  */
18240Sstevel@tonic-gate conn_t *
182511042SErik.Nordmark@Sun.COM ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
182611042SErik.Nordmark@Sun.COM     ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
18270Sstevel@tonic-gate {
18281676Sjpk 	connf_t		*connfp;
18290Sstevel@tonic-gate 	conn_t		*connp;
18300Sstevel@tonic-gate 	in_port_t	lport;
183111042SErik.Nordmark@Sun.COM 	int		ipversion;
18321676Sjpk 	const void	*dst;
183311042SErik.Nordmark@Sun.COM 	zoneid_t	zoneid = ira->ira_zoneid;
18340Sstevel@tonic-gate 
18350Sstevel@tonic-gate 	lport = ((uint16_t *)&ports)[1];
183611042SErik.Nordmark@Sun.COM 	if (ira->ira_flags & IRAF_IS_IPV4) {
183711042SErik.Nordmark@Sun.COM 		dst = (const void *)&ipha->ipha_dst;
183811042SErik.Nordmark@Sun.COM 		ipversion = IPV4_VERSION;
183911042SErik.Nordmark@Sun.COM 	} else {
184011042SErik.Nordmark@Sun.COM 		dst = (const void *)&ip6h->ip6_dst;
184111042SErik.Nordmark@Sun.COM 		ipversion = IPV6_VERSION;
18421676Sjpk 	}
18431676Sjpk 
18443448Sdh155122 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
18450Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
18460Sstevel@tonic-gate 	for (connp = connfp->connf_head; connp != NULL;
18470Sstevel@tonic-gate 	    connp = connp->conn_next) {
18480Sstevel@tonic-gate 		/* We don't allow v4 fallback for v6 raw socket. */
184911042SErik.Nordmark@Sun.COM 		if (ipversion != connp->conn_ipversion)
18500Sstevel@tonic-gate 			continue;
185111042SErik.Nordmark@Sun.COM 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
185211042SErik.Nordmark@Sun.COM 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
185311042SErik.Nordmark@Sun.COM 			if (ipversion == IPV4_VERSION) {
18541676Sjpk 				if (!IPCL_CONN_MATCH(connp, protocol,
185511042SErik.Nordmark@Sun.COM 				    ipha->ipha_src, ipha->ipha_dst, ports))
18561676Sjpk 					continue;
18570Sstevel@tonic-gate 			} else {
18581676Sjpk 				if (!IPCL_CONN_MATCH_V6(connp, protocol,
185911042SErik.Nordmark@Sun.COM 				    ip6h->ip6_src, ip6h->ip6_dst, ports))
18601676Sjpk 					continue;
18610Sstevel@tonic-gate 			}
18620Sstevel@tonic-gate 		} else {
186311042SErik.Nordmark@Sun.COM 			if (ipversion == IPV4_VERSION) {
18641676Sjpk 				if (!IPCL_BIND_MATCH(connp, protocol,
186511042SErik.Nordmark@Sun.COM 				    ipha->ipha_dst, lport))
18661676Sjpk 					continue;
18670Sstevel@tonic-gate 			} else {
18681676Sjpk 				if (!IPCL_BIND_MATCH_V6(connp, protocol,
186911042SErik.Nordmark@Sun.COM 				    ip6h->ip6_dst, lport))
18701676Sjpk 					continue;
18710Sstevel@tonic-gate 			}
18720Sstevel@tonic-gate 		}
18731676Sjpk 
187411042SErik.Nordmark@Sun.COM 		if (connp->conn_zoneid == zoneid ||
187511042SErik.Nordmark@Sun.COM 		    connp->conn_allzones ||
187611042SErik.Nordmark@Sun.COM 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
187711042SErik.Nordmark@Sun.COM 		    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
187811042SErik.Nordmark@Sun.COM 		    (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
18791676Sjpk 			break;
18801676Sjpk 	}
188111042SErik.Nordmark@Sun.COM 
188211042SErik.Nordmark@Sun.COM 	if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
188311042SErik.Nordmark@Sun.COM 	    !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
18841676Sjpk 		DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
18851676Sjpk 		    char *, "connp(1) could not receive mp(2)",
18861676Sjpk 		    conn_t *, connp, mblk_t *, mp);
18871676Sjpk 		connp = NULL;
18880Sstevel@tonic-gate 	}
1889409Skcpoon 
1890409Skcpoon 	if (connp != NULL)
1891409Skcpoon 		goto found;
1892409Skcpoon 	mutex_exit(&connfp->connf_lock);
1893409Skcpoon 
189411042SErik.Nordmark@Sun.COM 	/* Try to look for a wildcard SCTP RAW socket match. */
18953448Sdh155122 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1896409Skcpoon 	mutex_enter(&connfp->connf_lock);
1897409Skcpoon 	for (connp = connfp->connf_head; connp != NULL;
1898409Skcpoon 	    connp = connp->conn_next) {
1899409Skcpoon 		/* We don't allow v4 fallback for v6 raw socket. */
190011042SErik.Nordmark@Sun.COM 		if (ipversion != connp->conn_ipversion)
190111042SErik.Nordmark@Sun.COM 			continue;
190211042SErik.Nordmark@Sun.COM 		if (!IPCL_ZONE_MATCH(connp, zoneid))
1903409Skcpoon 			continue;
190411042SErik.Nordmark@Sun.COM 
190511042SErik.Nordmark@Sun.COM 		if (ipversion == IPV4_VERSION) {
190611042SErik.Nordmark@Sun.COM 			if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1907409Skcpoon 				break;
1908409Skcpoon 		} else {
190911042SErik.Nordmark@Sun.COM 			if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1910409Skcpoon 				break;
1911409Skcpoon 			}
1912409Skcpoon 		}
19130Sstevel@tonic-gate 	}
1914409Skcpoon 
1915409Skcpoon 	if (connp != NULL)
1916409Skcpoon 		goto found;
1917409Skcpoon 
19180Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
19190Sstevel@tonic-gate 	return (NULL);
1920409Skcpoon 
1921409Skcpoon found:
1922409Skcpoon 	ASSERT(connp != NULL);
1923409Skcpoon 	CONN_INC_REF(connp);
1924409Skcpoon 	mutex_exit(&connfp->connf_lock);
1925409Skcpoon 	return (connp);
19260Sstevel@tonic-gate }
19270Sstevel@tonic-gate 
19280Sstevel@tonic-gate /* ARGSUSED */
19290Sstevel@tonic-gate static int
19305240Snordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
19310Sstevel@tonic-gate {
19320Sstevel@tonic-gate 	itc_t	*itc = (itc_t *)buf;
19330Sstevel@tonic-gate 	conn_t 	*connp = &itc->itc_conn;
19345240Snordmark 	tcp_t	*tcp = (tcp_t *)&itc[1];
19355240Snordmark 
19365240Snordmark 	bzero(connp, sizeof (conn_t));
19375240Snordmark 	bzero(tcp, sizeof (tcp_t));
19385240Snordmark 
19395240Snordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
19405240Snordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
19418348SEric.Yu@Sun.COM 	cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
194211042SErik.Nordmark@Sun.COM 	tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
194311042SErik.Nordmark@Sun.COM 	if (tcp->tcp_timercache == NULL)
194411042SErik.Nordmark@Sun.COM 		return (ENOMEM);
19450Sstevel@tonic-gate 	connp->conn_tcp = tcp;
19460Sstevel@tonic-gate 	connp->conn_flags = IPCL_TCPCONN;
194711042SErik.Nordmark@Sun.COM 	connp->conn_proto = IPPROTO_TCP;
19480Sstevel@tonic-gate 	tcp->tcp_connp = connp;
194911042SErik.Nordmark@Sun.COM 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
195011042SErik.Nordmark@Sun.COM 
195111042SErik.Nordmark@Sun.COM 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
195211042SErik.Nordmark@Sun.COM 	if (connp->conn_ixa == NULL) {
195311042SErik.Nordmark@Sun.COM 		tcp_timermp_free(tcp);
195411042SErik.Nordmark@Sun.COM 		return (ENOMEM);
195511042SErik.Nordmark@Sun.COM 	}
195611042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_refcnt = 1;
195711042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
195811042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
19590Sstevel@tonic-gate 	return (0);
19600Sstevel@tonic-gate }
19610Sstevel@tonic-gate 
19620Sstevel@tonic-gate /* ARGSUSED */
19630Sstevel@tonic-gate static void
19645240Snordmark tcp_conn_destructor(void *buf, void *cdrarg)
19655240Snordmark {
19665240Snordmark 	itc_t	*itc = (itc_t *)buf;
19675240Snordmark 	conn_t 	*connp = &itc->itc_conn;
19685240Snordmark 	tcp_t	*tcp = (tcp_t *)&itc[1];
19695240Snordmark 
19705240Snordmark 	ASSERT(connp->conn_flags & IPCL_TCPCONN);
19715240Snordmark 	ASSERT(tcp->tcp_connp == connp);
19725240Snordmark 	ASSERT(connp->conn_tcp == tcp);
19735240Snordmark 	tcp_timermp_free(tcp);
19745240Snordmark 	mutex_destroy(&connp->conn_lock);
19755240Snordmark 	cv_destroy(&connp->conn_cv);
19768348SEric.Yu@Sun.COM 	cv_destroy(&connp->conn_sq_cv);
197711042SErik.Nordmark@Sun.COM 	rw_destroy(&connp->conn_ilg_lock);
197811042SErik.Nordmark@Sun.COM 
197911042SErik.Nordmark@Sun.COM 	/* Can be NULL if constructor failed */
198011042SErik.Nordmark@Sun.COM 	if (connp->conn_ixa != NULL) {
198111042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
198211042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
198311042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
198411042SErik.Nordmark@Sun.COM 		ixa_refrele(connp->conn_ixa);
198511042SErik.Nordmark@Sun.COM 	}
19865240Snordmark }
19875240Snordmark 
19885240Snordmark /* ARGSUSED */
19895240Snordmark static int
19905240Snordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
19915240Snordmark {
19925240Snordmark 	itc_t	*itc = (itc_t *)buf;
19935240Snordmark 	conn_t 	*connp = &itc->itc_conn;
19945240Snordmark 
19955240Snordmark 	bzero(connp, sizeof (conn_t));
19965240Snordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
19975240Snordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
19985240Snordmark 	connp->conn_flags = IPCL_IPCCONN;
199911042SErik.Nordmark@Sun.COM 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
20005240Snordmark 
200111042SErik.Nordmark@Sun.COM 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
200211042SErik.Nordmark@Sun.COM 	if (connp->conn_ixa == NULL)
200311042SErik.Nordmark@Sun.COM 		return (ENOMEM);
200411042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_refcnt = 1;
200511042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
20065240Snordmark 	return (0);
20075240Snordmark }
20085240Snordmark 
20095240Snordmark /* ARGSUSED */
20105240Snordmark static void
20115240Snordmark ip_conn_destructor(void *buf, void *cdrarg)
20125240Snordmark {
20135240Snordmark 	itc_t	*itc = (itc_t *)buf;
20145240Snordmark 	conn_t 	*connp = &itc->itc_conn;
20155240Snordmark 
20165240Snordmark 	ASSERT(connp->conn_flags & IPCL_IPCCONN);
20175240Snordmark 	ASSERT(connp->conn_priv == NULL);
20185240Snordmark 	mutex_destroy(&connp->conn_lock);
20195240Snordmark 	cv_destroy(&connp->conn_cv);
202011042SErik.Nordmark@Sun.COM 	rw_destroy(&connp->conn_ilg_lock);
202111042SErik.Nordmark@Sun.COM 
202211042SErik.Nordmark@Sun.COM 	/* Can be NULL if constructor failed */
202311042SErik.Nordmark@Sun.COM 	if (connp->conn_ixa != NULL) {
202411042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
202511042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
202611042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
202711042SErik.Nordmark@Sun.COM 		ixa_refrele(connp->conn_ixa);
202811042SErik.Nordmark@Sun.COM 	}
20295240Snordmark }
20305240Snordmark 
20315240Snordmark /* ARGSUSED */
20325240Snordmark static int
20335240Snordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
20345240Snordmark {
20355240Snordmark 	itc_t	*itc = (itc_t *)buf;
20365240Snordmark 	conn_t 	*connp = &itc->itc_conn;
20375240Snordmark 	udp_t	*udp = (udp_t *)&itc[1];
20385240Snordmark 
20395240Snordmark 	bzero(connp, sizeof (conn_t));
20405240Snordmark 	bzero(udp, sizeof (udp_t));
20415240Snordmark 
20425240Snordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
20435240Snordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
20445240Snordmark 	connp->conn_udp = udp;
20455240Snordmark 	connp->conn_flags = IPCL_UDPCONN;
204611042SErik.Nordmark@Sun.COM 	connp->conn_proto = IPPROTO_UDP;
20475240Snordmark 	udp->udp_connp = connp;
204811042SErik.Nordmark@Sun.COM 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
204911042SErik.Nordmark@Sun.COM 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
205011042SErik.Nordmark@Sun.COM 	if (connp->conn_ixa == NULL)
205111042SErik.Nordmark@Sun.COM 		return (ENOMEM);
205211042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_refcnt = 1;
205311042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
205411042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
20555240Snordmark 	return (0);
20565240Snordmark }
20575240Snordmark 
20585240Snordmark /* ARGSUSED */
20595240Snordmark static void
20605240Snordmark udp_conn_destructor(void *buf, void *cdrarg)
20615240Snordmark {
20625240Snordmark 	itc_t	*itc = (itc_t *)buf;
20635240Snordmark 	conn_t 	*connp = &itc->itc_conn;
20645240Snordmark 	udp_t	*udp = (udp_t *)&itc[1];
20655240Snordmark 
20665240Snordmark 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
20675240Snordmark 	ASSERT(udp->udp_connp == connp);
20685240Snordmark 	ASSERT(connp->conn_udp == udp);
20695240Snordmark 	mutex_destroy(&connp->conn_lock);
20705240Snordmark 	cv_destroy(&connp->conn_cv);
207111042SErik.Nordmark@Sun.COM 	rw_destroy(&connp->conn_ilg_lock);
207211042SErik.Nordmark@Sun.COM 
207311042SErik.Nordmark@Sun.COM 	/* Can be NULL if constructor failed */
207411042SErik.Nordmark@Sun.COM 	if (connp->conn_ixa != NULL) {
207511042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
207611042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
207711042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
207811042SErik.Nordmark@Sun.COM 		ixa_refrele(connp->conn_ixa);
207911042SErik.Nordmark@Sun.COM 	}
20805240Snordmark }
20815240Snordmark 
20825240Snordmark /* ARGSUSED */
20835240Snordmark static int
20845240Snordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
20850Sstevel@tonic-gate {
20865240Snordmark 	itc_t	*itc = (itc_t *)buf;
20875240Snordmark 	conn_t 	*connp = &itc->itc_conn;
20885240Snordmark 	icmp_t	*icmp = (icmp_t *)&itc[1];
20895240Snordmark 
20905240Snordmark 	bzero(connp, sizeof (conn_t));
20915240Snordmark 	bzero(icmp, sizeof (icmp_t));
20925240Snordmark 
20935240Snordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
20945240Snordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
20955240Snordmark 	connp->conn_icmp = icmp;
20965240Snordmark 	connp->conn_flags = IPCL_RAWIPCONN;
209711042SErik.Nordmark@Sun.COM 	connp->conn_proto = IPPROTO_ICMP;
20985240Snordmark 	icmp->icmp_connp = connp;
209911042SErik.Nordmark@Sun.COM 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
210011042SErik.Nordmark@Sun.COM 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
210111042SErik.Nordmark@Sun.COM 	if (connp->conn_ixa == NULL)
210211042SErik.Nordmark@Sun.COM 		return (ENOMEM);
210311042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_refcnt = 1;
210411042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
210511042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
21065240Snordmark 	return (0);
21075240Snordmark }
21085240Snordmark 
21095240Snordmark /* ARGSUSED */
21105240Snordmark static void
21115240Snordmark rawip_conn_destructor(void *buf, void *cdrarg)
21125240Snordmark {
21135240Snordmark 	itc_t	*itc = (itc_t *)buf;
21145240Snordmark 	conn_t 	*connp = &itc->itc_conn;
21155240Snordmark 	icmp_t	*icmp = (icmp_t *)&itc[1];
21165240Snordmark 
21175240Snordmark 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
21185240Snordmark 	ASSERT(icmp->icmp_connp == connp);
21195240Snordmark 	ASSERT(connp->conn_icmp == icmp);
21205240Snordmark 	mutex_destroy(&connp->conn_lock);
21215240Snordmark 	cv_destroy(&connp->conn_cv);
212211042SErik.Nordmark@Sun.COM 	rw_destroy(&connp->conn_ilg_lock);
212311042SErik.Nordmark@Sun.COM 
212411042SErik.Nordmark@Sun.COM 	/* Can be NULL if constructor failed */
212511042SErik.Nordmark@Sun.COM 	if (connp->conn_ixa != NULL) {
212611042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
212711042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
212811042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
212911042SErik.Nordmark@Sun.COM 		ixa_refrele(connp->conn_ixa);
213011042SErik.Nordmark@Sun.COM 	}
21315240Snordmark }
21325240Snordmark 
21335240Snordmark /* ARGSUSED */
21345240Snordmark static int
21355240Snordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
21365240Snordmark {
21375240Snordmark 	itc_t	*itc = (itc_t *)buf;
21385240Snordmark 	conn_t 	*connp = &itc->itc_conn;
21395240Snordmark 	rts_t	*rts = (rts_t *)&itc[1];
21405240Snordmark 
21415240Snordmark 	bzero(connp, sizeof (conn_t));
21425240Snordmark 	bzero(rts, sizeof (rts_t));
21435240Snordmark 
21445240Snordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
21455240Snordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
21465240Snordmark 	connp->conn_rts = rts;
21475240Snordmark 	connp->conn_flags = IPCL_RTSCONN;
21485240Snordmark 	rts->rts_connp = connp;
214911042SErik.Nordmark@Sun.COM 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
215011042SErik.Nordmark@Sun.COM 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
215111042SErik.Nordmark@Sun.COM 	if (connp->conn_ixa == NULL)
215211042SErik.Nordmark@Sun.COM 		return (ENOMEM);
215311042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_refcnt = 1;
215411042SErik.Nordmark@Sun.COM 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
21555240Snordmark 	return (0);
21565240Snordmark }
21575240Snordmark 
21585240Snordmark /* ARGSUSED */
21595240Snordmark static void
21605240Snordmark rts_conn_destructor(void *buf, void *cdrarg)
21615240Snordmark {
21625240Snordmark 	itc_t	*itc = (itc_t *)buf;
21635240Snordmark 	conn_t 	*connp = &itc->itc_conn;
21645240Snordmark 	rts_t	*rts = (rts_t *)&itc[1];
21655240Snordmark 
21665240Snordmark 	ASSERT(connp->conn_flags & IPCL_RTSCONN);
21675240Snordmark 	ASSERT(rts->rts_connp == connp);
21685240Snordmark 	ASSERT(connp->conn_rts == rts);
21695240Snordmark 	mutex_destroy(&connp->conn_lock);
21705240Snordmark 	cv_destroy(&connp->conn_cv);
217111042SErik.Nordmark@Sun.COM 	rw_destroy(&connp->conn_ilg_lock);
21728444SRao.Shoaib@Sun.COM 
217311042SErik.Nordmark@Sun.COM 	/* Can be NULL if constructor failed */
217411042SErik.Nordmark@Sun.COM 	if (connp->conn_ixa != NULL) {
217511042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
217611042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
217711042SErik.Nordmark@Sun.COM 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
217811042SErik.Nordmark@Sun.COM 		ixa_refrele(connp->conn_ixa);
21798348SEric.Yu@Sun.COM 	}
21808348SEric.Yu@Sun.COM }
21818348SEric.Yu@Sun.COM 
21825240Snordmark /*
21835240Snordmark  * Called as part of ipcl_conn_destroy to assert and clear any pointers
21845240Snordmark  * in the conn_t.
218511042SErik.Nordmark@Sun.COM  *
218611042SErik.Nordmark@Sun.COM  * Below we list all the pointers in the conn_t as a documentation aid.
218711042SErik.Nordmark@Sun.COM  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
218811042SErik.Nordmark@Sun.COM  * If you add any pointers to the conn_t please add an ASSERT here
218911042SErik.Nordmark@Sun.COM  * and #ifdef it out if it can't be actually asserted to be NULL.
219011042SErik.Nordmark@Sun.COM  * In any case, we bzero most of the conn_t at the end of the function.
21915240Snordmark  */
21925240Snordmark void
21935240Snordmark ipcl_conn_cleanup(conn_t *connp)
21945240Snordmark {
219511042SErik.Nordmark@Sun.COM 	ip_xmit_attr_t	*ixa;
219611042SErik.Nordmark@Sun.COM 
21975240Snordmark 	ASSERT(connp->conn_latch == NULL);
219811042SErik.Nordmark@Sun.COM 	ASSERT(connp->conn_latch_in_policy == NULL);
219911042SErik.Nordmark@Sun.COM 	ASSERT(connp->conn_latch_in_action == NULL);
22005240Snordmark #ifdef notdef
22015240Snordmark 	ASSERT(connp->conn_rq == NULL);
22025240Snordmark 	ASSERT(connp->conn_wq == NULL);
22035240Snordmark #endif
22045240Snordmark 	ASSERT(connp->conn_cred == NULL);
22055240Snordmark 	ASSERT(connp->conn_g_fanout == NULL);
22065240Snordmark 	ASSERT(connp->conn_g_next == NULL);
22075240Snordmark 	ASSERT(connp->conn_g_prev == NULL);
22085240Snordmark 	ASSERT(connp->conn_policy == NULL);
22095240Snordmark 	ASSERT(connp->conn_fanout == NULL);
22105240Snordmark 	ASSERT(connp->conn_next == NULL);
22115240Snordmark 	ASSERT(connp->conn_prev == NULL);
22125240Snordmark 	ASSERT(connp->conn_oper_pending_ill == NULL);
22135240Snordmark 	ASSERT(connp->conn_ilg == NULL);
22145240Snordmark 	ASSERT(connp->conn_drain_next == NULL);
22155240Snordmark 	ASSERT(connp->conn_drain_prev == NULL);
22165277Snordmark #ifdef notdef
22175277Snordmark 	/* conn_idl is not cleared when removed from idl list */
22185240Snordmark 	ASSERT(connp->conn_idl == NULL);
22195277Snordmark #endif
22205240Snordmark 	ASSERT(connp->conn_ipsec_opt_mp == NULL);
222111042SErik.Nordmark@Sun.COM #ifdef notdef
222211042SErik.Nordmark@Sun.COM 	/* conn_netstack is cleared by the caller; needed by ixa_cleanup */
22235240Snordmark 	ASSERT(connp->conn_netstack == NULL);
222411042SErik.Nordmark@Sun.COM #endif
22255240Snordmark 
22268348SEric.Yu@Sun.COM 	ASSERT(connp->conn_helper_info == NULL);
222711042SErik.Nordmark@Sun.COM 	ASSERT(connp->conn_ixa != NULL);
222811042SErik.Nordmark@Sun.COM 	ixa = connp->conn_ixa;
222911042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_refcnt == 1);
223011042SErik.Nordmark@Sun.COM 	/* Need to preserve ixa_protocol */
223111042SErik.Nordmark@Sun.COM 	ixa_cleanup(ixa);
223211042SErik.Nordmark@Sun.COM 	ixa->ixa_flags = 0;
223311042SErik.Nordmark@Sun.COM 
22345240Snordmark 	/* Clear out the conn_t fields that are not preserved */
22355240Snordmark 	bzero(&connp->conn_start_clr,
22365240Snordmark 	    sizeof (conn_t) -
22375240Snordmark 	    ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
22380Sstevel@tonic-gate }
22390Sstevel@tonic-gate 
22400Sstevel@tonic-gate /*
22410Sstevel@tonic-gate  * All conns are inserted in a global multi-list for the benefit of
22420Sstevel@tonic-gate  * walkers. The walk is guaranteed to walk all open conns at the time
22430Sstevel@tonic-gate  * of the start of the walk exactly once. This property is needed to
22440Sstevel@tonic-gate  * achieve some cleanups during unplumb of interfaces. This is achieved
22450Sstevel@tonic-gate  * as follows.
22460Sstevel@tonic-gate  *
22470Sstevel@tonic-gate  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
22480Sstevel@tonic-gate  * call the insert and delete functions below at creation and deletion
22490Sstevel@tonic-gate  * time respectively. The conn never moves or changes its position in this
22500Sstevel@tonic-gate  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
22510Sstevel@tonic-gate  * won't increase due to walkers, once the conn deletion has started. Note
22520Sstevel@tonic-gate  * that we can't remove the conn from the global list and then wait for
22530Sstevel@tonic-gate  * the refcnt to drop to zero, since walkers would then see a truncated
22540Sstevel@tonic-gate  * list. CONN_INCIPIENT ensures that walkers don't start looking at
22550Sstevel@tonic-gate  * conns until ip_open is ready to make them globally visible.
22560Sstevel@tonic-gate  * The global round robin multi-list locks are held only to get the
22570Sstevel@tonic-gate  * next member/insertion/deletion and contention should be negligible
22580Sstevel@tonic-gate  * if the multi-list is much greater than the number of cpus.
22590Sstevel@tonic-gate  */
22600Sstevel@tonic-gate void
22610Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp)
22620Sstevel@tonic-gate {
22630Sstevel@tonic-gate 	int	index;
22643448Sdh155122 	struct connf_s	*connfp;
22653448Sdh155122 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
22660Sstevel@tonic-gate 
22670Sstevel@tonic-gate 	/*
22680Sstevel@tonic-gate 	 * No need for atomic here. Approximate even distribution
22690Sstevel@tonic-gate 	 * in the global lists is sufficient.
22700Sstevel@tonic-gate 	 */
22713448Sdh155122 	ipst->ips_conn_g_index++;
22723448Sdh155122 	index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
22730Sstevel@tonic-gate 
22740Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
22750Sstevel@tonic-gate 	/*
22760Sstevel@tonic-gate 	 * Mark as INCIPIENT, so that walkers will ignore this
22770Sstevel@tonic-gate 	 * for now, till ip_open is ready to make it visible globally.
22780Sstevel@tonic-gate 	 */
22790Sstevel@tonic-gate 	connp->conn_state_flags |= CONN_INCIPIENT;
22800Sstevel@tonic-gate 
22813448Sdh155122 	connfp = &ipst->ips_ipcl_globalhash_fanout[index];
22820Sstevel@tonic-gate 	/* Insert at the head of the list */
22833448Sdh155122 	mutex_enter(&connfp->connf_lock);
22843448Sdh155122 	connp->conn_g_next = connfp->connf_head;
22850Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
22860Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp;
22873448Sdh155122 	connfp->connf_head = connp;
22880Sstevel@tonic-gate 
22890Sstevel@tonic-gate 	/* The fanout bucket this conn points to */
22903448Sdh155122 	connp->conn_g_fanout = connfp;
22910Sstevel@tonic-gate 
22923448Sdh155122 	mutex_exit(&connfp->connf_lock);
22930Sstevel@tonic-gate }
22940Sstevel@tonic-gate 
22950Sstevel@tonic-gate void
22960Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp)
22970Sstevel@tonic-gate {
22983448Sdh155122 	struct connf_s	*connfp;
22993448Sdh155122 
23000Sstevel@tonic-gate 	/*
23010Sstevel@tonic-gate 	 * We were never inserted in the global multi list.
23020Sstevel@tonic-gate 	 * IPCL_NONE variety is never inserted in the global multilist
23030Sstevel@tonic-gate 	 * since it is presumed to not need any cleanup and is transient.
23040Sstevel@tonic-gate 	 */
23050Sstevel@tonic-gate 	if (connp->conn_g_fanout == NULL)
23060Sstevel@tonic-gate 		return;
23070Sstevel@tonic-gate 
23083448Sdh155122 	connfp = connp->conn_g_fanout;
23093448Sdh155122 	mutex_enter(&connfp->connf_lock);
23100Sstevel@tonic-gate 	if (connp->conn_g_prev != NULL)
23110Sstevel@tonic-gate 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
23120Sstevel@tonic-gate 	else
23133448Sdh155122 		connfp->connf_head = connp->conn_g_next;
23140Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
23150Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
23163448Sdh155122 	mutex_exit(&connfp->connf_lock);
23170Sstevel@tonic-gate 
23180Sstevel@tonic-gate 	/* Better to stumble on a null pointer than to corrupt memory */
23190Sstevel@tonic-gate 	connp->conn_g_next = NULL;
23200Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
23215240Snordmark 	connp->conn_g_fanout = NULL;
23220Sstevel@tonic-gate }
23230Sstevel@tonic-gate 
23240Sstevel@tonic-gate /*
23250Sstevel@tonic-gate  * Walk the list of all conn_t's in the system, calling the function provided
232611042SErik.Nordmark@Sun.COM  * With the specified argument for each.
23270Sstevel@tonic-gate  * Applies to both IPv4 and IPv6.
23280Sstevel@tonic-gate  *
232911042SErik.Nordmark@Sun.COM  * CONNs may hold pointers to ills (conn_dhcpinit_ill and
233011042SErik.Nordmark@Sun.COM  * conn_oper_pending_ill). To guard against stale pointers
23310Sstevel@tonic-gate  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
23320Sstevel@tonic-gate  * unplumbed or removed. New conn_t's that are created while we are walking
23330Sstevel@tonic-gate  * may be missed by this walk, because they are not necessarily inserted
23340Sstevel@tonic-gate  * at the tail of the list. They are new conn_t's and thus don't have any
23350Sstevel@tonic-gate  * stale pointers. The CONN_CLOSING flag ensures that no new reference
23360Sstevel@tonic-gate  * is created to the struct that is going away.
23370Sstevel@tonic-gate  */
23380Sstevel@tonic-gate void
23393448Sdh155122 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
23400Sstevel@tonic-gate {
23410Sstevel@tonic-gate 	int	i;
23420Sstevel@tonic-gate 	conn_t	*connp;
23430Sstevel@tonic-gate 	conn_t	*prev_connp;
23440Sstevel@tonic-gate 
23450Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
23463448Sdh155122 		mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23470Sstevel@tonic-gate 		prev_connp = NULL;
23483448Sdh155122 		connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
23490Sstevel@tonic-gate 		while (connp != NULL) {
23500Sstevel@tonic-gate 			mutex_enter(&connp->conn_lock);
23510Sstevel@tonic-gate 			if (connp->conn_state_flags &
23520Sstevel@tonic-gate 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
23530Sstevel@tonic-gate 				mutex_exit(&connp->conn_lock);
23540Sstevel@tonic-gate 				connp = connp->conn_g_next;
23550Sstevel@tonic-gate 				continue;
23560Sstevel@tonic-gate 			}
23570Sstevel@tonic-gate 			CONN_INC_REF_LOCKED(connp);
23580Sstevel@tonic-gate 			mutex_exit(&connp->conn_lock);
23593448Sdh155122 			mutex_exit(
23603448Sdh155122 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23610Sstevel@tonic-gate 			(*func)(connp, arg);
23620Sstevel@tonic-gate 			if (prev_connp != NULL)
23630Sstevel@tonic-gate 				CONN_DEC_REF(prev_connp);
23643448Sdh155122 			mutex_enter(
23653448Sdh155122 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23660Sstevel@tonic-gate 			prev_connp = connp;
23670Sstevel@tonic-gate 			connp = connp->conn_g_next;
23680Sstevel@tonic-gate 		}
23693448Sdh155122 		mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23700Sstevel@tonic-gate 		if (prev_connp != NULL)
23710Sstevel@tonic-gate 			CONN_DEC_REF(prev_connp);
23720Sstevel@tonic-gate 	}
23730Sstevel@tonic-gate }
23740Sstevel@tonic-gate 
23750Sstevel@tonic-gate /*
23760Sstevel@tonic-gate  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
23770Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
23780Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
23792323Sethindra  * (peer tcp in ESTABLISHED state).
23800Sstevel@tonic-gate  */
23810Sstevel@tonic-gate conn_t *
238211042SErik.Nordmark@Sun.COM ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
23833448Sdh155122     ip_stack_t *ipst)
23840Sstevel@tonic-gate {
23850Sstevel@tonic-gate 	uint32_t ports;
23860Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
23870Sstevel@tonic-gate 	connf_t	*connfp;
23880Sstevel@tonic-gate 	conn_t	*tconnp;
23890Sstevel@tonic-gate 	boolean_t zone_chk;
23900Sstevel@tonic-gate 
23910Sstevel@tonic-gate 	/*
23920Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
23930Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
23940Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
23950Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.
23960Sstevel@tonic-gate 	 */
23970Sstevel@tonic-gate 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
23980Sstevel@tonic-gate 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
23990Sstevel@tonic-gate 
240011042SErik.Nordmark@Sun.COM 	pports[0] = tcpha->tha_fport;
240111042SErik.Nordmark@Sun.COM 	pports[1] = tcpha->tha_lport;
24020Sstevel@tonic-gate 
24033448Sdh155122 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
24043448Sdh155122 	    ports, ipst)];
24050Sstevel@tonic-gate 
24060Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
24070Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
24080Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
24090Sstevel@tonic-gate 
24100Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
24110Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
24122323Sethindra 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
24130Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
24140Sstevel@tonic-gate 
24150Sstevel@tonic-gate 			ASSERT(tconnp != connp);
24160Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
24170Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
24180Sstevel@tonic-gate 			return (tconnp);
24190Sstevel@tonic-gate 		}
24200Sstevel@tonic-gate 	}
24210Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
24220Sstevel@tonic-gate 	return (NULL);
24230Sstevel@tonic-gate }
24240Sstevel@tonic-gate 
24250Sstevel@tonic-gate /*
24260Sstevel@tonic-gate  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
24270Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
24280Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
24292323Sethindra  * (peer tcp in ESTABLISHED state).
24300Sstevel@tonic-gate  */
24310Sstevel@tonic-gate conn_t *
243211042SErik.Nordmark@Sun.COM ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
24333448Sdh155122     ip_stack_t *ipst)
24340Sstevel@tonic-gate {
24350Sstevel@tonic-gate 	uint32_t ports;
24360Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
24370Sstevel@tonic-gate 	connf_t	*connfp;
24380Sstevel@tonic-gate 	conn_t	*tconnp;
24390Sstevel@tonic-gate 	boolean_t zone_chk;
24400Sstevel@tonic-gate 
24410Sstevel@tonic-gate 	/*
24420Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
24430Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
24440Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
24450Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.  We
24460Sstevel@tonic-gate 	 * don't do Zone check for link local address(es) because the
24470Sstevel@tonic-gate 	 * current Zone implementation treats each link local address as
24480Sstevel@tonic-gate 	 * being unique per system node, i.e. they belong to global Zone.
24490Sstevel@tonic-gate 	 */
24500Sstevel@tonic-gate 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
24510Sstevel@tonic-gate 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
24520Sstevel@tonic-gate 
245311042SErik.Nordmark@Sun.COM 	pports[0] = tcpha->tha_fport;
245411042SErik.Nordmark@Sun.COM 	pports[1] = tcpha->tha_lport;
24550Sstevel@tonic-gate 
24563448Sdh155122 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
24573448Sdh155122 	    ports, ipst)];
24580Sstevel@tonic-gate 
24590Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
24600Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
24610Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
24620Sstevel@tonic-gate 
246311042SErik.Nordmark@Sun.COM 		/* We skip conn_bound_if check here as this is loopback tcp */
24640Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
24650Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
24662323Sethindra 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
24670Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
24680Sstevel@tonic-gate 
24690Sstevel@tonic-gate 			ASSERT(tconnp != connp);
24700Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
24710Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
24720Sstevel@tonic-gate 			return (tconnp);
24730Sstevel@tonic-gate 		}
24740Sstevel@tonic-gate 	}
24750Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
24760Sstevel@tonic-gate 	return (NULL);
24770Sstevel@tonic-gate }
24780Sstevel@tonic-gate 
24790Sstevel@tonic-gate /*
24800Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
24810Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
24820Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
24830Sstevel@tonic-gate  */
24840Sstevel@tonic-gate conn_t *
248511042SErik.Nordmark@Sun.COM ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
24863448Sdh155122     ip_stack_t *ipst)
24870Sstevel@tonic-gate {
24880Sstevel@tonic-gate 	uint32_t ports;
24890Sstevel@tonic-gate 	uint16_t *pports;
24900Sstevel@tonic-gate 	connf_t	*connfp;
24910Sstevel@tonic-gate 	conn_t	*tconnp;
24920Sstevel@tonic-gate 
24930Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
249411042SErik.Nordmark@Sun.COM 	pports[0] = tcpha->tha_fport;
249511042SErik.Nordmark@Sun.COM 	pports[1] = tcpha->tha_lport;
24960Sstevel@tonic-gate 
24973448Sdh155122 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
24984691Skcpoon 	    ports, ipst)];
24990Sstevel@tonic-gate 
25000Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
25010Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
25020Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
25030Sstevel@tonic-gate 
25040Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
25050Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
25060Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= min_state) {
25070Sstevel@tonic-gate 
25080Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
25090Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
25100Sstevel@tonic-gate 			return (tconnp);
25110Sstevel@tonic-gate 		}
25120Sstevel@tonic-gate 	}
25130Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
25140Sstevel@tonic-gate 	return (NULL);
25150Sstevel@tonic-gate }
25160Sstevel@tonic-gate 
25170Sstevel@tonic-gate /*
25180Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
25190Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
25200Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
25210Sstevel@tonic-gate  * Match on ifindex in addition to addresses.
25220Sstevel@tonic-gate  */
25230Sstevel@tonic-gate conn_t *
25240Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
25253448Sdh155122     uint_t ifindex, ip_stack_t *ipst)
25260Sstevel@tonic-gate {
25270Sstevel@tonic-gate 	tcp_t	*tcp;
25280Sstevel@tonic-gate 	uint32_t ports;
25290Sstevel@tonic-gate 	uint16_t *pports;
25300Sstevel@tonic-gate 	connf_t	*connfp;
25310Sstevel@tonic-gate 	conn_t	*tconnp;
25320Sstevel@tonic-gate 
25330Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
25340Sstevel@tonic-gate 	pports[0] = tcpha->tha_fport;
25350Sstevel@tonic-gate 	pports[1] = tcpha->tha_lport;
25360Sstevel@tonic-gate 
25373448Sdh155122 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
25384691Skcpoon 	    ports, ipst)];
25390Sstevel@tonic-gate 
25400Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
25410Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
25420Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
25430Sstevel@tonic-gate 
25440Sstevel@tonic-gate 		tcp = tconnp->conn_tcp;
25450Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
25460Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
25470Sstevel@tonic-gate 		    tcp->tcp_state >= min_state &&
254811042SErik.Nordmark@Sun.COM 		    (tconnp->conn_bound_if == 0 ||
254911042SErik.Nordmark@Sun.COM 		    tconnp->conn_bound_if == ifindex)) {
25500Sstevel@tonic-gate 
25510Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
25520Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
25530Sstevel@tonic-gate 			return (tconnp);
25540Sstevel@tonic-gate 		}
25550Sstevel@tonic-gate 	}
25560Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
25570Sstevel@tonic-gate 	return (NULL);
25580Sstevel@tonic-gate }
25590Sstevel@tonic-gate 
25600Sstevel@tonic-gate /*
25611676Sjpk  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
25621676Sjpk  * a listener when changing state.
25630Sstevel@tonic-gate  */
25640Sstevel@tonic-gate conn_t *
25653448Sdh155122 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
25663448Sdh155122     ip_stack_t *ipst)
25670Sstevel@tonic-gate {
25680Sstevel@tonic-gate 	connf_t		*bind_connfp;
25690Sstevel@tonic-gate 	conn_t		*connp;
25700Sstevel@tonic-gate 	tcp_t		*tcp;
25710Sstevel@tonic-gate 
25720Sstevel@tonic-gate 	/*
25730Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
25740Sstevel@tonic-gate 	 * all zeros.
25750Sstevel@tonic-gate 	 */
25760Sstevel@tonic-gate 	if (laddr == 0)
25770Sstevel@tonic-gate 		return (NULL);
25780Sstevel@tonic-gate 
25791676Sjpk 	ASSERT(zoneid != ALL_ZONES);
25801676Sjpk 
25813448Sdh155122 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
25820Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
25830Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
25840Sstevel@tonic-gate 	    connp = connp->conn_next) {
25850Sstevel@tonic-gate 		tcp = connp->conn_tcp;
25860Sstevel@tonic-gate 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
25872263Ssommerfe 		    IPCL_ZONE_MATCH(connp, zoneid) &&
25880Sstevel@tonic-gate 		    (tcp->tcp_listener == NULL)) {
25890Sstevel@tonic-gate 			CONN_INC_REF(connp);
25900Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
25910Sstevel@tonic-gate 			return (connp);
25920Sstevel@tonic-gate 		}
25930Sstevel@tonic-gate 	}
25940Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
25950Sstevel@tonic-gate 	return (NULL);
25960Sstevel@tonic-gate }
25970Sstevel@tonic-gate 
25981676Sjpk /*
25991676Sjpk  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
26001676Sjpk  * a listener when changing state.
26011676Sjpk  */
26020Sstevel@tonic-gate conn_t *
26030Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
26043448Sdh155122     zoneid_t zoneid, ip_stack_t *ipst)
26050Sstevel@tonic-gate {
26060Sstevel@tonic-gate 	connf_t		*bind_connfp;
26070Sstevel@tonic-gate 	conn_t		*connp = NULL;
26080Sstevel@tonic-gate 	tcp_t		*tcp;
26090Sstevel@tonic-gate 
26100Sstevel@tonic-gate 	/*
26110Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
26120Sstevel@tonic-gate 	 * all zeros.
26130Sstevel@tonic-gate 	 */
26140Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
26150Sstevel@tonic-gate 		return (NULL);
26160Sstevel@tonic-gate 
26171676Sjpk 	ASSERT(zoneid != ALL_ZONES);
26180Sstevel@tonic-gate 
26193448Sdh155122 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
26200Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
26210Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
26220Sstevel@tonic-gate 	    connp = connp->conn_next) {
26230Sstevel@tonic-gate 		tcp = connp->conn_tcp;
26240Sstevel@tonic-gate 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
26252263Ssommerfe 		    IPCL_ZONE_MATCH(connp, zoneid) &&
262611042SErik.Nordmark@Sun.COM 		    (connp->conn_bound_if == 0 ||
262711042SErik.Nordmark@Sun.COM 		    connp->conn_bound_if == ifindex) &&
26280Sstevel@tonic-gate 		    tcp->tcp_listener == NULL) {
26290Sstevel@tonic-gate 			CONN_INC_REF(connp);
26300Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
26310Sstevel@tonic-gate 			return (connp);
26320Sstevel@tonic-gate 		}
26330Sstevel@tonic-gate 	}
26340Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
26350Sstevel@tonic-gate 	return (NULL);
26360Sstevel@tonic-gate }
26370Sstevel@tonic-gate 
2638741Smasputra /*
2639741Smasputra  * ipcl_get_next_conn
2640741Smasputra  *	get the next entry in the conn global list
2641741Smasputra  *	and put a reference on the next_conn.
2642741Smasputra  *	decrement the reference on the current conn.
2643741Smasputra  *
2644741Smasputra  * This is an iterator based walker function that also provides for
2645741Smasputra  * some selection by the caller. It walks through the conn_hash bucket
2646741Smasputra  * searching for the next valid connp in the list, and selects connections
2647741Smasputra  * that are neither closed nor condemned. It also REFHOLDS the conn
2648741Smasputra  * thus ensuring that the conn exists when the caller uses the conn.
2649741Smasputra  */
2650741Smasputra conn_t *
2651741Smasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2652741Smasputra {
2653741Smasputra 	conn_t	*next_connp;
2654741Smasputra 
2655741Smasputra 	if (connfp == NULL)
2656741Smasputra 		return (NULL);
2657741Smasputra 
2658741Smasputra 	mutex_enter(&connfp->connf_lock);
2659741Smasputra 
2660741Smasputra 	next_connp = (connp == NULL) ?
2661741Smasputra 	    connfp->connf_head : connp->conn_g_next;
2662741Smasputra 
2663741Smasputra 	while (next_connp != NULL) {
2664741Smasputra 		mutex_enter(&next_connp->conn_lock);
2665741Smasputra 		if (!(next_connp->conn_flags & conn_flags) ||
2666741Smasputra 		    (next_connp->conn_state_flags &
2667741Smasputra 		    (CONN_CONDEMNED | CONN_INCIPIENT))) {
2668741Smasputra 			/*
2669741Smasputra 			 * This conn has been condemned or
2670741Smasputra 			 * is closing, or the flags don't match
2671741Smasputra 			 */
2672741Smasputra 			mutex_exit(&next_connp->conn_lock);
2673741Smasputra 			next_connp = next_connp->conn_g_next;
2674741Smasputra 			continue;
2675741Smasputra 		}
2676741Smasputra 		CONN_INC_REF_LOCKED(next_connp);
2677741Smasputra 		mutex_exit(&next_connp->conn_lock);
2678741Smasputra 		break;
2679741Smasputra 	}
2680741Smasputra 
2681741Smasputra 	mutex_exit(&connfp->connf_lock);
2682741Smasputra 
2683741Smasputra 	if (connp != NULL)
2684741Smasputra 		CONN_DEC_REF(connp);
2685741Smasputra 
2686741Smasputra 	return (next_connp);
2687741Smasputra }
2688741Smasputra 
26890Sstevel@tonic-gate #ifdef CONN_DEBUG
26900Sstevel@tonic-gate /*
26910Sstevel@tonic-gate  * Trace of the last NBUF refhold/refrele
26920Sstevel@tonic-gate  */
26930Sstevel@tonic-gate int
26940Sstevel@tonic-gate conn_trace_ref(conn_t *connp)
26950Sstevel@tonic-gate {
26960Sstevel@tonic-gate 	int	last;
26970Sstevel@tonic-gate 	conn_trace_t	*ctb;
26980Sstevel@tonic-gate 
26990Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
27000Sstevel@tonic-gate 	last = connp->conn_trace_last;
27010Sstevel@tonic-gate 	last++;
27020Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
27030Sstevel@tonic-gate 		last = 0;
27040Sstevel@tonic-gate 
27050Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
27065023Scarlsonj 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
27070Sstevel@tonic-gate 	connp->conn_trace_last = last;
27080Sstevel@tonic-gate 	return (1);
27090Sstevel@tonic-gate }
27100Sstevel@tonic-gate 
27110Sstevel@tonic-gate int
27120Sstevel@tonic-gate conn_untrace_ref(conn_t *connp)
27130Sstevel@tonic-gate {
27140Sstevel@tonic-gate 	int	last;
27150Sstevel@tonic-gate 	conn_trace_t	*ctb;
27160Sstevel@tonic-gate 
27170Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
27180Sstevel@tonic-gate 	last = connp->conn_trace_last;
27190Sstevel@tonic-gate 	last++;
27200Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
27210Sstevel@tonic-gate 		last = 0;
27220Sstevel@tonic-gate 
27230Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
27245023Scarlsonj 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
27250Sstevel@tonic-gate 	connp->conn_trace_last = last;
27260Sstevel@tonic-gate 	return (1);
27270Sstevel@tonic-gate }
27280Sstevel@tonic-gate #endif
2729