10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51503Sericheng * Common Development and Distribution License (the "License"). 61503Sericheng * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 227502Saruna@cs.umn.edu * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* 270Sstevel@tonic-gate * IP PACKET CLASSIFIER 280Sstevel@tonic-gate * 290Sstevel@tonic-gate * The IP packet classifier provides mapping between IP packets and persistent 300Sstevel@tonic-gate * connection state for connection-oriented protocols. It also provides 310Sstevel@tonic-gate * interface for managing connection states. 320Sstevel@tonic-gate * 330Sstevel@tonic-gate * The connection state is kept in conn_t data structure and contains, among 340Sstevel@tonic-gate * other things: 350Sstevel@tonic-gate * 360Sstevel@tonic-gate * o local/remote address and ports 370Sstevel@tonic-gate * o Transport protocol 380Sstevel@tonic-gate * o squeue for the connection (for TCP only) 390Sstevel@tonic-gate * o reference counter 400Sstevel@tonic-gate * o Connection state 410Sstevel@tonic-gate * o hash table linkage 420Sstevel@tonic-gate * o interface/ire information 430Sstevel@tonic-gate * o credentials 440Sstevel@tonic-gate * o ipsec policy 450Sstevel@tonic-gate * o send and receive functions. 460Sstevel@tonic-gate * o mutex lock. 470Sstevel@tonic-gate * 480Sstevel@tonic-gate * Connections use a reference counting scheme. They are freed when the 490Sstevel@tonic-gate * reference counter drops to zero. A reference is incremented when connection 500Sstevel@tonic-gate * is placed in a list or table, when incoming packet for the connection arrives 510Sstevel@tonic-gate * and when connection is processed via squeue (squeue processing may be 520Sstevel@tonic-gate * asynchronous and the reference protects the connection from being destroyed 530Sstevel@tonic-gate * before its processing is finished). 540Sstevel@tonic-gate * 550Sstevel@tonic-gate * send and receive functions are currently used for TCP only. The send function 560Sstevel@tonic-gate * determines the IP entry point for the packet once it leaves TCP to be sent to 570Sstevel@tonic-gate * the destination address. The receive function is used by IP when the packet 580Sstevel@tonic-gate * should be passed for TCP processing. When a new connection is created these 590Sstevel@tonic-gate * are set to ip_output() and tcp_input() respectively. During the lifetime of 600Sstevel@tonic-gate * the connection the send and receive functions may change depending on the 610Sstevel@tonic-gate * changes in the connection state. For example, Once the connection is bound to 620Sstevel@tonic-gate * an addresse, the receive function for this connection is set to 630Sstevel@tonic-gate * tcp_conn_request(). This allows incoming SYNs to go directly into the 640Sstevel@tonic-gate * listener SYN processing function without going to tcp_input() first. 650Sstevel@tonic-gate * 660Sstevel@tonic-gate * Classifier uses several hash tables: 670Sstevel@tonic-gate * 680Sstevel@tonic-gate * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 690Sstevel@tonic-gate * ipcl_bind_fanout: contains all connections in BOUND state 700Sstevel@tonic-gate * ipcl_proto_fanout: IPv4 protocol fanout 710Sstevel@tonic-gate * ipcl_proto_fanout_v6: IPv6 protocol fanout 720Sstevel@tonic-gate * ipcl_udp_fanout: contains all UDP connections 730Sstevel@tonic-gate * ipcl_globalhash_fanout: contains all connections 740Sstevel@tonic-gate * 750Sstevel@tonic-gate * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 760Sstevel@tonic-gate * which need to view all existing connections. 770Sstevel@tonic-gate * 780Sstevel@tonic-gate * All tables are protected by per-bucket locks. When both per-bucket lock and 790Sstevel@tonic-gate * connection lock need to be held, the per-bucket lock should be acquired 800Sstevel@tonic-gate * first, followed by the connection lock. 810Sstevel@tonic-gate * 820Sstevel@tonic-gate * All functions doing search in one of these tables increment a reference 830Sstevel@tonic-gate * counter on the connection found (if any). This reference should be dropped 840Sstevel@tonic-gate * when the caller has finished processing the connection. 850Sstevel@tonic-gate * 860Sstevel@tonic-gate * 870Sstevel@tonic-gate * INTERFACES: 880Sstevel@tonic-gate * =========== 890Sstevel@tonic-gate * 900Sstevel@tonic-gate * Connection Lookup: 910Sstevel@tonic-gate * ------------------ 920Sstevel@tonic-gate * 933448Sdh155122 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack) 943448Sdh155122 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack) 950Sstevel@tonic-gate * 960Sstevel@tonic-gate * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 970Sstevel@tonic-gate * it can't find any associated connection. If the connection is found, its 980Sstevel@tonic-gate * reference counter is incremented. 990Sstevel@tonic-gate * 1000Sstevel@tonic-gate * mp: mblock, containing packet header. The full header should fit 1010Sstevel@tonic-gate * into a single mblock. It should also contain at least full IP 1020Sstevel@tonic-gate * and TCP or UDP header. 1030Sstevel@tonic-gate * 1040Sstevel@tonic-gate * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 1050Sstevel@tonic-gate * 1060Sstevel@tonic-gate * hdr_len: The size of IP header. It is used to find TCP or UDP header in 1070Sstevel@tonic-gate * the packet. 1080Sstevel@tonic-gate * 1091676Sjpk * zoneid: The zone in which the returned connection must be; the zoneid 1101676Sjpk * corresponding to the ire_zoneid on the IRE located for the 1111676Sjpk * packet's destination address. 1120Sstevel@tonic-gate * 1130Sstevel@tonic-gate * For TCP connections, the lookup order is as follows: 1140Sstevel@tonic-gate * 5-tuple {src, dst, protocol, local port, remote port} 1150Sstevel@tonic-gate * lookup in ipcl_conn_fanout table. 1160Sstevel@tonic-gate * 3-tuple {dst, remote port, protocol} lookup in 1170Sstevel@tonic-gate * ipcl_bind_fanout table. 1180Sstevel@tonic-gate * 1190Sstevel@tonic-gate * For UDP connections, a 5-tuple {src, dst, protocol, local port, 1200Sstevel@tonic-gate * remote port} lookup is done on ipcl_udp_fanout. Note that, 1210Sstevel@tonic-gate * these interfaces do not handle cases where a packets belongs 1220Sstevel@tonic-gate * to multiple UDP clients, which is handled in IP itself. 1230Sstevel@tonic-gate * 1241676Sjpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 1251676Sjpk * determine which actual zone gets the segment. This is used only in a 1261676Sjpk * labeled environment. The matching rules are: 1271676Sjpk * 1281676Sjpk * - If it's not a multilevel port, then the label on the packet selects 1291676Sjpk * the zone. Unlabeled packets are delivered to the global zone. 1301676Sjpk * 1311676Sjpk * - If it's a multilevel port, then only the zone registered to receive 1321676Sjpk * packets on that port matches. 1331676Sjpk * 1341676Sjpk * Also, in a labeled environment, packet labels need to be checked. For fully 1351676Sjpk * bound TCP connections, we can assume that the packet label was checked 1361676Sjpk * during connection establishment, and doesn't need to be checked on each 1371676Sjpk * packet. For others, though, we need to check for strict equality or, for 1381676Sjpk * multilevel ports, membership in the range or set. This part currently does 1391676Sjpk * a tnrh lookup on each packet, but could be optimized to use cached results 1401676Sjpk * if that were necessary. (SCTP doesn't come through here, but if it did, 1411676Sjpk * we would apply the same rules as TCP.) 1421676Sjpk * 1431676Sjpk * An implication of the above is that fully-bound TCP sockets must always use 1441676Sjpk * distinct 4-tuples; they can't be discriminated by label alone. 1451676Sjpk * 1461676Sjpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 1471676Sjpk * as there's no connection set-up handshake and no shared state. 1481676Sjpk * 1491676Sjpk * Labels on looped-back packets within a single zone do not need to be 1501676Sjpk * checked, as all processes in the same zone have the same label. 1511676Sjpk * 1521676Sjpk * Finally, for unlabeled packets received by a labeled system, special rules 1531676Sjpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a 1541676Sjpk * socket in the zone whose label matches the default label of the sender, if 1551676Sjpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 1561676Sjpk * receiver's label must dominate the sender's default label. 1571676Sjpk * 1583448Sdh155122 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack); 1593448Sdh155122 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 1603448Sdh155122 * ip_stack); 1610Sstevel@tonic-gate * 1620Sstevel@tonic-gate * Lookup routine to find a exact match for {src, dst, local port, 1630Sstevel@tonic-gate * remote port) for TCP connections in ipcl_conn_fanout. The address and 1640Sstevel@tonic-gate * ports are read from the IP and TCP header respectively. 1650Sstevel@tonic-gate * 1663448Sdh155122 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 1673448Sdh155122 * zoneid, ip_stack); 1683448Sdh155122 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 1693448Sdh155122 * zoneid, ip_stack); 1700Sstevel@tonic-gate * 1710Sstevel@tonic-gate * Lookup routine to find a listener with the tuple {lport, laddr, 1720Sstevel@tonic-gate * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 1730Sstevel@tonic-gate * parameter interface index is also compared. 1740Sstevel@tonic-gate * 1753448Sdh155122 * void ipcl_walk(func, arg, ip_stack) 1760Sstevel@tonic-gate * 1770Sstevel@tonic-gate * Apply 'func' to every connection available. The 'func' is called as 1780Sstevel@tonic-gate * (*func)(connp, arg). The walk is non-atomic so connections may be 1790Sstevel@tonic-gate * created and destroyed during the walk. The CONN_CONDEMNED and 1800Sstevel@tonic-gate * CONN_INCIPIENT flags ensure that connections which are newly created 1810Sstevel@tonic-gate * or being destroyed are not selected by the walker. 1820Sstevel@tonic-gate * 1830Sstevel@tonic-gate * Table Updates 1840Sstevel@tonic-gate * ------------- 1850Sstevel@tonic-gate * 1860Sstevel@tonic-gate * int ipcl_conn_insert(connp, protocol, src, dst, ports) 1870Sstevel@tonic-gate * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 1880Sstevel@tonic-gate * 1890Sstevel@tonic-gate * Insert 'connp' in the ipcl_conn_fanout. 1900Sstevel@tonic-gate * Arguements : 1910Sstevel@tonic-gate * connp conn_t to be inserted 1920Sstevel@tonic-gate * protocol connection protocol 1930Sstevel@tonic-gate * src source address 1940Sstevel@tonic-gate * dst destination address 1950Sstevel@tonic-gate * ports local and remote port 1960Sstevel@tonic-gate * ifindex interface index for IPv6 connections 1970Sstevel@tonic-gate * 1980Sstevel@tonic-gate * Return value : 1990Sstevel@tonic-gate * 0 if connp was inserted 2000Sstevel@tonic-gate * EADDRINUSE if the connection with the same tuple 2010Sstevel@tonic-gate * already exists. 2020Sstevel@tonic-gate * 2030Sstevel@tonic-gate * int ipcl_bind_insert(connp, protocol, src, lport); 2040Sstevel@tonic-gate * int ipcl_bind_insert_v6(connp, protocol, src, lport); 2050Sstevel@tonic-gate * 2060Sstevel@tonic-gate * Insert 'connp' in ipcl_bind_fanout. 2070Sstevel@tonic-gate * Arguements : 2080Sstevel@tonic-gate * connp conn_t to be inserted 2090Sstevel@tonic-gate * protocol connection protocol 2100Sstevel@tonic-gate * src source address connection wants 2110Sstevel@tonic-gate * to bind to 2120Sstevel@tonic-gate * lport local port connection wants to 2130Sstevel@tonic-gate * bind to 2140Sstevel@tonic-gate * 2150Sstevel@tonic-gate * 2160Sstevel@tonic-gate * void ipcl_hash_remove(connp); 2170Sstevel@tonic-gate * 2180Sstevel@tonic-gate * Removes the 'connp' from the connection fanout table. 2190Sstevel@tonic-gate * 2200Sstevel@tonic-gate * Connection Creation/Destruction 2210Sstevel@tonic-gate * ------------------------------- 2220Sstevel@tonic-gate * 2233448Sdh155122 * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 2240Sstevel@tonic-gate * 2250Sstevel@tonic-gate * Creates a new conn based on the type flag, inserts it into 2260Sstevel@tonic-gate * globalhash table. 2270Sstevel@tonic-gate * 2280Sstevel@tonic-gate * type: This flag determines the type of conn_t which needs to be 2295240Snordmark * created i.e., which kmem_cache it comes from. 2300Sstevel@tonic-gate * IPCL_TCPCONN indicates a TCP connection 2315240Snordmark * IPCL_SCTPCONN indicates a SCTP connection 2325240Snordmark * IPCL_UDPCONN indicates a UDP conn_t. 2335240Snordmark * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 2345240Snordmark * IPCL_RTSCONN indicates a RTS conn_t. 2355240Snordmark * IPCL_IPCCONN indicates all other connections. 2360Sstevel@tonic-gate * 2370Sstevel@tonic-gate * void ipcl_conn_destroy(connp) 2380Sstevel@tonic-gate * 2390Sstevel@tonic-gate * Destroys the connection state, removes it from the global 2400Sstevel@tonic-gate * connection hash table and frees its memory. 2410Sstevel@tonic-gate */ 2420Sstevel@tonic-gate 2430Sstevel@tonic-gate #include <sys/types.h> 2440Sstevel@tonic-gate #include <sys/stream.h> 2450Sstevel@tonic-gate #include <sys/stropts.h> 2460Sstevel@tonic-gate #include <sys/sysmacros.h> 2470Sstevel@tonic-gate #include <sys/strsubr.h> 2480Sstevel@tonic-gate #include <sys/strsun.h> 2490Sstevel@tonic-gate #define _SUN_TPI_VERSION 2 2500Sstevel@tonic-gate #include <sys/ddi.h> 2510Sstevel@tonic-gate #include <sys/cmn_err.h> 2520Sstevel@tonic-gate #include <sys/debug.h> 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate #include <sys/systm.h> 2550Sstevel@tonic-gate #include <sys/param.h> 2560Sstevel@tonic-gate #include <sys/kmem.h> 2570Sstevel@tonic-gate #include <sys/isa_defs.h> 2580Sstevel@tonic-gate #include <inet/common.h> 2590Sstevel@tonic-gate #include <netinet/ip6.h> 2600Sstevel@tonic-gate #include <netinet/icmp6.h> 2610Sstevel@tonic-gate 2620Sstevel@tonic-gate #include <inet/ip.h> 2630Sstevel@tonic-gate #include <inet/ip6.h> 2640Sstevel@tonic-gate #include <inet/tcp.h> 2650Sstevel@tonic-gate #include <inet/ip_ndp.h> 266741Smasputra #include <inet/udp_impl.h> 2670Sstevel@tonic-gate #include <inet/sctp_ip.h> 2683448Sdh155122 #include <inet/sctp/sctp_impl.h> 2695240Snordmark #include <inet/rawip_impl.h> 2705240Snordmark #include <inet/rts_impl.h> 2710Sstevel@tonic-gate 2720Sstevel@tonic-gate #include <sys/cpuvar.h> 2730Sstevel@tonic-gate 2740Sstevel@tonic-gate #include <inet/ipclassifier.h> 2750Sstevel@tonic-gate #include <inet/ipsec_impl.h> 2760Sstevel@tonic-gate 2771676Sjpk #include <sys/tsol/tnet.h> 2781676Sjpk 2790Sstevel@tonic-gate #ifdef DEBUG 2800Sstevel@tonic-gate #define IPCL_DEBUG 2810Sstevel@tonic-gate #else 2820Sstevel@tonic-gate #undef IPCL_DEBUG 2830Sstevel@tonic-gate #endif 2840Sstevel@tonic-gate 2850Sstevel@tonic-gate #ifdef IPCL_DEBUG 2860Sstevel@tonic-gate int ipcl_debug_level = 0; 2870Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) \ 2880Sstevel@tonic-gate if (ipcl_debug_level & level) { printf args; } 2890Sstevel@tonic-gate #else 2900Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) {; } 2910Sstevel@tonic-gate #endif 2923448Sdh155122 /* Old value for compatibility. Setable in /etc/system */ 2930Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0; 2940Sstevel@tonic-gate 2953448Sdh155122 /* New value. Zero means choose automatically. Setable in /etc/system */ 2960Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0; 2970Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192; 2980Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500; 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate /* bind/udp fanout table size */ 3010Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512; 3021503Sericheng uint_t ipcl_udp_fanout_size = 16384; 3030Sstevel@tonic-gate 3040Sstevel@tonic-gate /* Raw socket fanout size. Must be a power of 2. */ 3050Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256; 3060Sstevel@tonic-gate 3070Sstevel@tonic-gate /* 3080Sstevel@tonic-gate * Power of 2^N Primes useful for hashing for N of 0-28, 3090Sstevel@tonic-gate * these primes are the nearest prime <= 2^N - 2^(N-2). 3100Sstevel@tonic-gate */ 3110Sstevel@tonic-gate 3120Sstevel@tonic-gate #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 3130Sstevel@tonic-gate 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 3140Sstevel@tonic-gate 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 3150Sstevel@tonic-gate 50331599, 100663291, 201326557, 0} 3160Sstevel@tonic-gate 3170Sstevel@tonic-gate /* 3185240Snordmark * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 3195240Snordmark * are aligned on cache lines. 3200Sstevel@tonic-gate */ 3215240Snordmark typedef union itc_s { 3225240Snordmark conn_t itc_conn; 3235240Snordmark char itcu_filler[CACHE_ALIGN(conn_s)]; 3240Sstevel@tonic-gate } itc_t; 3250Sstevel@tonic-gate 3265240Snordmark struct kmem_cache *tcp_conn_cache; 3275240Snordmark struct kmem_cache *ip_conn_cache; 3280Sstevel@tonic-gate extern struct kmem_cache *sctp_conn_cache; 3290Sstevel@tonic-gate extern struct kmem_cache *tcp_sack_info_cache; 3300Sstevel@tonic-gate extern struct kmem_cache *tcp_iphc_cache; 3315240Snordmark struct kmem_cache *udp_conn_cache; 3325240Snordmark struct kmem_cache *rawip_conn_cache; 3335240Snordmark struct kmem_cache *rts_conn_cache; 3340Sstevel@tonic-gate 3350Sstevel@tonic-gate extern void tcp_timermp_free(tcp_t *); 3360Sstevel@tonic-gate extern mblk_t *tcp_timermp_alloc(int); 3370Sstevel@tonic-gate 3385240Snordmark static int ip_conn_constructor(void *, void *, int); 3395240Snordmark static void ip_conn_destructor(void *, void *); 3405240Snordmark 3415240Snordmark static int tcp_conn_constructor(void *, void *, int); 3425240Snordmark static void tcp_conn_destructor(void *, void *); 3435240Snordmark 3445240Snordmark static int udp_conn_constructor(void *, void *, int); 3455240Snordmark static void udp_conn_destructor(void *, void *); 3465240Snordmark 3475240Snordmark static int rawip_conn_constructor(void *, void *, int); 3485240Snordmark static void rawip_conn_destructor(void *, void *); 3495240Snordmark 3505240Snordmark static int rts_conn_constructor(void *, void *, int); 3515240Snordmark static void rts_conn_destructor(void *, void *); 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate #ifdef IPCL_DEBUG 3540Sstevel@tonic-gate #define INET_NTOA_BUFSIZE 18 3550Sstevel@tonic-gate 3560Sstevel@tonic-gate static char * 3570Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b) 3580Sstevel@tonic-gate { 3590Sstevel@tonic-gate unsigned char *p; 3600Sstevel@tonic-gate 3610Sstevel@tonic-gate p = (unsigned char *)∈ 3620Sstevel@tonic-gate (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 3630Sstevel@tonic-gate return (b); 3640Sstevel@tonic-gate } 3650Sstevel@tonic-gate #endif 3660Sstevel@tonic-gate 3670Sstevel@tonic-gate /* 3683448Sdh155122 * Global (for all stack instances) init routine 3690Sstevel@tonic-gate */ 3700Sstevel@tonic-gate void 3713448Sdh155122 ipcl_g_init(void) 3720Sstevel@tonic-gate { 3735240Snordmark ip_conn_cache = kmem_cache_create("ip_conn_cache", 3740Sstevel@tonic-gate sizeof (conn_t), CACHE_ALIGN_SIZE, 3755240Snordmark ip_conn_constructor, ip_conn_destructor, 3765240Snordmark NULL, NULL, NULL, 0); 3775240Snordmark 3785240Snordmark tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 3795240Snordmark sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 3805240Snordmark tcp_conn_constructor, tcp_conn_destructor, 3815240Snordmark NULL, NULL, NULL, 0); 3820Sstevel@tonic-gate 3835240Snordmark udp_conn_cache = kmem_cache_create("udp_conn_cache", 3845240Snordmark sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 3855240Snordmark udp_conn_constructor, udp_conn_destructor, 3865240Snordmark NULL, NULL, NULL, 0); 3875240Snordmark 3885240Snordmark rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 3895240Snordmark sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 3905240Snordmark rawip_conn_constructor, rawip_conn_destructor, 3915240Snordmark NULL, NULL, NULL, 0); 3925240Snordmark 3935240Snordmark rts_conn_cache = kmem_cache_create("rts_conn_cache", 3945240Snordmark sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 3955240Snordmark rts_conn_constructor, rts_conn_destructor, 3960Sstevel@tonic-gate NULL, NULL, NULL, 0); 3973448Sdh155122 } 3983448Sdh155122 3993448Sdh155122 /* 4003448Sdh155122 * ipclassifier intialization routine, sets up hash tables. 4013448Sdh155122 */ 4023448Sdh155122 void 4033448Sdh155122 ipcl_init(ip_stack_t *ipst) 4043448Sdh155122 { 4053448Sdh155122 int i; 4063448Sdh155122 int sizes[] = P2Ps(); 4070Sstevel@tonic-gate 4080Sstevel@tonic-gate /* 4093448Sdh155122 * Calculate size of conn fanout table from /etc/system settings 4100Sstevel@tonic-gate */ 4110Sstevel@tonic-gate if (ipcl_conn_hash_size != 0) { 4123448Sdh155122 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 4130Sstevel@tonic-gate } else if (tcp_conn_hash_size != 0) { 4143448Sdh155122 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 4150Sstevel@tonic-gate } else { 4160Sstevel@tonic-gate extern pgcnt_t freemem; 4170Sstevel@tonic-gate 4183448Sdh155122 ipst->ips_ipcl_conn_fanout_size = 4190Sstevel@tonic-gate (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 4200Sstevel@tonic-gate 4213448Sdh155122 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 4223448Sdh155122 ipst->ips_ipcl_conn_fanout_size = 4233448Sdh155122 ipcl_conn_hash_maxsize; 4243448Sdh155122 } 4250Sstevel@tonic-gate } 4260Sstevel@tonic-gate 4270Sstevel@tonic-gate for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 4283448Sdh155122 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 4290Sstevel@tonic-gate break; 4300Sstevel@tonic-gate } 4310Sstevel@tonic-gate } 4323448Sdh155122 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 4330Sstevel@tonic-gate /* Out of range, use the 2^16 value */ 4343448Sdh155122 ipst->ips_ipcl_conn_fanout_size = sizes[16]; 4350Sstevel@tonic-gate } 4363448Sdh155122 4373448Sdh155122 /* Take values from /etc/system */ 4383448Sdh155122 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 4393448Sdh155122 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 4403448Sdh155122 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 4410Sstevel@tonic-gate 4423448Sdh155122 ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 4433448Sdh155122 4443448Sdh155122 ipst->ips_ipcl_conn_fanout = kmem_zalloc( 4453448Sdh155122 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 4463448Sdh155122 4473448Sdh155122 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 4483448Sdh155122 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 4490Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4500Sstevel@tonic-gate } 4510Sstevel@tonic-gate 4523448Sdh155122 ipst->ips_ipcl_bind_fanout = kmem_zalloc( 4533448Sdh155122 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 4540Sstevel@tonic-gate 4553448Sdh155122 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 4563448Sdh155122 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 4570Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4580Sstevel@tonic-gate } 4590Sstevel@tonic-gate 4603448Sdh155122 ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX * 4613448Sdh155122 sizeof (connf_t), KM_SLEEP); 4623448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 4633448Sdh155122 mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL, 4640Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4650Sstevel@tonic-gate } 4663448Sdh155122 4673448Sdh155122 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 4683448Sdh155122 sizeof (connf_t), KM_SLEEP); 4693448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 4703448Sdh155122 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 4710Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4720Sstevel@tonic-gate } 4730Sstevel@tonic-gate 4743448Sdh155122 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 4753448Sdh155122 mutex_init(&ipst->ips_rts_clients->connf_lock, 4763448Sdh155122 NULL, MUTEX_DEFAULT, NULL); 4770Sstevel@tonic-gate 4783448Sdh155122 ipst->ips_ipcl_udp_fanout = kmem_zalloc( 4793448Sdh155122 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 4803448Sdh155122 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 4813448Sdh155122 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 4820Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4830Sstevel@tonic-gate } 4840Sstevel@tonic-gate 4853448Sdh155122 ipst->ips_ipcl_raw_fanout = kmem_zalloc( 4863448Sdh155122 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 4873448Sdh155122 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 4883448Sdh155122 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 4890Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4900Sstevel@tonic-gate } 4910Sstevel@tonic-gate 4923448Sdh155122 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 4933448Sdh155122 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 4940Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4953448Sdh155122 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 4963448Sdh155122 NULL, MUTEX_DEFAULT, NULL); 4970Sstevel@tonic-gate } 4980Sstevel@tonic-gate } 4990Sstevel@tonic-gate 5000Sstevel@tonic-gate void 5013448Sdh155122 ipcl_g_destroy(void) 5020Sstevel@tonic-gate { 5035240Snordmark kmem_cache_destroy(ip_conn_cache); 5045240Snordmark kmem_cache_destroy(tcp_conn_cache); 5055240Snordmark kmem_cache_destroy(udp_conn_cache); 5065240Snordmark kmem_cache_destroy(rawip_conn_cache); 5075240Snordmark kmem_cache_destroy(rts_conn_cache); 5083448Sdh155122 } 5093448Sdh155122 5103448Sdh155122 /* 5113448Sdh155122 * All user-level and kernel use of the stack must be gone 5123448Sdh155122 * by now. 5133448Sdh155122 */ 5143448Sdh155122 void 5153448Sdh155122 ipcl_destroy(ip_stack_t *ipst) 5163448Sdh155122 { 5173448Sdh155122 int i; 5183448Sdh155122 5193448Sdh155122 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 5203448Sdh155122 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 5213448Sdh155122 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 5223448Sdh155122 } 5233448Sdh155122 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 5243448Sdh155122 sizeof (connf_t)); 5253448Sdh155122 ipst->ips_ipcl_conn_fanout = NULL; 5263448Sdh155122 5273448Sdh155122 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 5283448Sdh155122 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 5293448Sdh155122 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 5303448Sdh155122 } 5313448Sdh155122 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 5323448Sdh155122 sizeof (connf_t)); 5333448Sdh155122 ipst->ips_ipcl_bind_fanout = NULL; 5343448Sdh155122 5353448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 5363448Sdh155122 ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL); 5373448Sdh155122 mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock); 5383448Sdh155122 } 5393448Sdh155122 kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t)); 5403448Sdh155122 ipst->ips_ipcl_proto_fanout = NULL; 5410Sstevel@tonic-gate 5423448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 5433448Sdh155122 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 5443448Sdh155122 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 5453448Sdh155122 } 5463448Sdh155122 kmem_free(ipst->ips_ipcl_proto_fanout_v6, 5473448Sdh155122 IPPROTO_MAX * sizeof (connf_t)); 5483448Sdh155122 ipst->ips_ipcl_proto_fanout_v6 = NULL; 5493448Sdh155122 5503448Sdh155122 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 5513448Sdh155122 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 5523448Sdh155122 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 5533448Sdh155122 } 5543448Sdh155122 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 5553448Sdh155122 sizeof (connf_t)); 5563448Sdh155122 ipst->ips_ipcl_udp_fanout = NULL; 5570Sstevel@tonic-gate 5583448Sdh155122 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 5593448Sdh155122 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 5603448Sdh155122 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 5613448Sdh155122 } 5623448Sdh155122 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 5633448Sdh155122 sizeof (connf_t)); 5643448Sdh155122 ipst->ips_ipcl_raw_fanout = NULL; 5650Sstevel@tonic-gate 5663448Sdh155122 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5673448Sdh155122 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 5683448Sdh155122 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 5693448Sdh155122 } 5703448Sdh155122 kmem_free(ipst->ips_ipcl_globalhash_fanout, 5713448Sdh155122 sizeof (connf_t) * CONN_G_HASH_SIZE); 5723448Sdh155122 ipst->ips_ipcl_globalhash_fanout = NULL; 5730Sstevel@tonic-gate 5743448Sdh155122 ASSERT(ipst->ips_rts_clients->connf_head == NULL); 5753448Sdh155122 mutex_destroy(&ipst->ips_rts_clients->connf_lock); 5763448Sdh155122 kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 5773448Sdh155122 ipst->ips_rts_clients = NULL; 5780Sstevel@tonic-gate } 5790Sstevel@tonic-gate 5800Sstevel@tonic-gate /* 5810Sstevel@tonic-gate * conn creation routine. initialize the conn, sets the reference 5820Sstevel@tonic-gate * and inserts it in the global hash table. 5830Sstevel@tonic-gate */ 5840Sstevel@tonic-gate conn_t * 5853448Sdh155122 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 5860Sstevel@tonic-gate { 5870Sstevel@tonic-gate conn_t *connp; 5883448Sdh155122 sctp_stack_t *sctps; 5895240Snordmark struct kmem_cache *conn_cache; 5900Sstevel@tonic-gate 5910Sstevel@tonic-gate switch (type) { 5920Sstevel@tonic-gate case IPCL_SCTPCONN: 5930Sstevel@tonic-gate if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 5940Sstevel@tonic-gate return (NULL); 5954691Skcpoon sctp_conn_init(connp); 5963448Sdh155122 sctps = ns->netstack_sctp; 5973448Sdh155122 SCTP_G_Q_REFHOLD(sctps); 5983448Sdh155122 netstack_hold(ns); 5993448Sdh155122 connp->conn_netstack = ns; 6005240Snordmark return (connp); 6015240Snordmark 6025240Snordmark case IPCL_TCPCONN: 6035240Snordmark conn_cache = tcp_conn_cache; 6040Sstevel@tonic-gate break; 6055240Snordmark 6065240Snordmark case IPCL_UDPCONN: 6075240Snordmark conn_cache = udp_conn_cache; 6085240Snordmark break; 6095240Snordmark 6105240Snordmark case IPCL_RAWIPCONN: 6115240Snordmark conn_cache = rawip_conn_cache; 6125240Snordmark break; 6135240Snordmark 6145240Snordmark case IPCL_RTSCONN: 6155240Snordmark conn_cache = rts_conn_cache; 6165240Snordmark break; 6175240Snordmark 6180Sstevel@tonic-gate case IPCL_IPCCONN: 6195240Snordmark conn_cache = ip_conn_cache; 6200Sstevel@tonic-gate break; 6215240Snordmark 622741Smasputra default: 623741Smasputra connp = NULL; 624741Smasputra ASSERT(0); 6250Sstevel@tonic-gate } 6260Sstevel@tonic-gate 6275240Snordmark if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 6285240Snordmark return (NULL); 6295240Snordmark 6305240Snordmark connp->conn_ref = 1; 6315240Snordmark netstack_hold(ns); 6325240Snordmark connp->conn_netstack = ns; 6335240Snordmark ipcl_globalhash_insert(connp); 6340Sstevel@tonic-gate return (connp); 6350Sstevel@tonic-gate } 6360Sstevel@tonic-gate 6370Sstevel@tonic-gate void 6380Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp) 6390Sstevel@tonic-gate { 6400Sstevel@tonic-gate mblk_t *mp; 6413448Sdh155122 netstack_t *ns = connp->conn_netstack; 6420Sstevel@tonic-gate 6430Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&connp->conn_lock)); 6440Sstevel@tonic-gate ASSERT(connp->conn_ref == 0); 6450Sstevel@tonic-gate ASSERT(connp->conn_ire_cache == NULL); 6460Sstevel@tonic-gate 6477502Saruna@cs.umn.edu DTRACE_PROBE1(conn__destroy, conn_t *, connp); 6487502Saruna@cs.umn.edu 6491676Sjpk if (connp->conn_peercred != NULL && 6501676Sjpk connp->conn_peercred != connp->conn_cred) 6511676Sjpk crfree(connp->conn_peercred); 6521676Sjpk connp->conn_peercred = NULL; 6531676Sjpk 6541676Sjpk if (connp->conn_cred != NULL) { 6551676Sjpk crfree(connp->conn_cred); 6561676Sjpk connp->conn_cred = NULL; 6571676Sjpk } 6581676Sjpk 6590Sstevel@tonic-gate ipcl_globalhash_remove(connp); 6600Sstevel@tonic-gate 6615240Snordmark /* FIXME: add separate tcp_conn_free()? */ 6620Sstevel@tonic-gate if (connp->conn_flags & IPCL_TCPCONN) { 663741Smasputra tcp_t *tcp = connp->conn_tcp; 6643448Sdh155122 tcp_stack_t *tcps; 6653448Sdh155122 6663448Sdh155122 ASSERT(tcp != NULL); 6673448Sdh155122 tcps = tcp->tcp_tcps; 6683448Sdh155122 if (tcps != NULL) { 6693448Sdh155122 if (connp->conn_latch != NULL) { 6703448Sdh155122 IPLATCH_REFRELE(connp->conn_latch, ns); 6713448Sdh155122 connp->conn_latch = NULL; 6723448Sdh155122 } 6733448Sdh155122 if (connp->conn_policy != NULL) { 6743448Sdh155122 IPPH_REFRELE(connp->conn_policy, ns); 6753448Sdh155122 connp->conn_policy = NULL; 6763448Sdh155122 } 6773448Sdh155122 tcp->tcp_tcps = NULL; 6783448Sdh155122 TCPS_REFRELE(tcps); 6793448Sdh155122 } 680741Smasputra 6810Sstevel@tonic-gate tcp_free(tcp); 6820Sstevel@tonic-gate mp = tcp->tcp_timercache; 6831676Sjpk tcp->tcp_cred = NULL; 6840Sstevel@tonic-gate 6850Sstevel@tonic-gate if (tcp->tcp_sack_info != NULL) { 6860Sstevel@tonic-gate bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 6870Sstevel@tonic-gate kmem_cache_free(tcp_sack_info_cache, 6880Sstevel@tonic-gate tcp->tcp_sack_info); 6890Sstevel@tonic-gate } 6900Sstevel@tonic-gate if (tcp->tcp_iphc != NULL) { 6910Sstevel@tonic-gate if (tcp->tcp_hdr_grown) { 6920Sstevel@tonic-gate kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 6930Sstevel@tonic-gate } else { 6940Sstevel@tonic-gate bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 6950Sstevel@tonic-gate kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 6960Sstevel@tonic-gate } 6970Sstevel@tonic-gate tcp->tcp_iphc_len = 0; 6980Sstevel@tonic-gate } 6990Sstevel@tonic-gate ASSERT(tcp->tcp_iphc_len == 0); 7000Sstevel@tonic-gate 701*8014SKacheong.Poon@Sun.COM if (tcp->tcp_ordrel_mp != NULL) { 702*8014SKacheong.Poon@Sun.COM freeb(tcp->tcp_ordrel_mp); 703*8014SKacheong.Poon@Sun.COM tcp->tcp_ordrel_mp = NULL; 704*8014SKacheong.Poon@Sun.COM } 705*8014SKacheong.Poon@Sun.COM 706*8014SKacheong.Poon@Sun.COM /* 707*8014SKacheong.Poon@Sun.COM * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate 708*8014SKacheong.Poon@Sun.COM * the mblk. 709*8014SKacheong.Poon@Sun.COM */ 710*8014SKacheong.Poon@Sun.COM if (tcp->tcp_rsrv_mp != NULL) { 711*8014SKacheong.Poon@Sun.COM freeb(tcp->tcp_rsrv_mp); 712*8014SKacheong.Poon@Sun.COM tcp->tcp_rsrv_mp = NULL; 713*8014SKacheong.Poon@Sun.COM mutex_destroy(&tcp->tcp_rsrv_mp_lock); 714*8014SKacheong.Poon@Sun.COM } 715*8014SKacheong.Poon@Sun.COM 7163448Sdh155122 ASSERT(connp->conn_latch == NULL); 7173448Sdh155122 ASSERT(connp->conn_policy == NULL); 7183448Sdh155122 7193448Sdh155122 if (ns != NULL) { 7203448Sdh155122 ASSERT(tcp->tcp_tcps == NULL); 7213448Sdh155122 connp->conn_netstack = NULL; 7223448Sdh155122 netstack_rele(ns); 7233448Sdh155122 } 7245240Snordmark 7255240Snordmark ipcl_conn_cleanup(connp); 7265240Snordmark connp->conn_flags = IPCL_TCPCONN; 7275240Snordmark bzero(tcp, sizeof (tcp_t)); 7285240Snordmark 7295240Snordmark tcp->tcp_timercache = mp; 7305240Snordmark tcp->tcp_connp = connp; 7315240Snordmark kmem_cache_free(tcp_conn_cache, connp); 7325240Snordmark return; 7335240Snordmark } 7345240Snordmark if (connp->conn_latch != NULL) { 7355240Snordmark IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack); 7365240Snordmark connp->conn_latch = NULL; 7375240Snordmark } 7385240Snordmark if (connp->conn_policy != NULL) { 7395240Snordmark IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); 7405240Snordmark connp->conn_policy = NULL; 7415240Snordmark } 7425240Snordmark if (connp->conn_ipsec_opt_mp != NULL) { 7435240Snordmark freemsg(connp->conn_ipsec_opt_mp); 7445240Snordmark connp->conn_ipsec_opt_mp = NULL; 7455240Snordmark } 7465240Snordmark 7475240Snordmark if (connp->conn_flags & IPCL_SCTPCONN) { 7483448Sdh155122 ASSERT(ns != NULL); 7490Sstevel@tonic-gate sctp_free(connp); 7505240Snordmark return; 7515240Snordmark } 7525240Snordmark 7535240Snordmark if (ns != NULL) { 7545240Snordmark connp->conn_netstack = NULL; 7555240Snordmark netstack_rele(ns); 7565240Snordmark } 7575240Snordmark ipcl_conn_cleanup(connp); 7585240Snordmark 7595240Snordmark /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 7605240Snordmark if (connp->conn_flags & IPCL_UDPCONN) { 7615240Snordmark connp->conn_flags = IPCL_UDPCONN; 7625240Snordmark kmem_cache_free(udp_conn_cache, connp); 7635240Snordmark } else if (connp->conn_flags & IPCL_RAWIPCONN) { 7645240Snordmark connp->conn_flags = IPCL_RAWIPCONN; 7655240Snordmark connp->conn_ulp = IPPROTO_ICMP; 7665240Snordmark kmem_cache_free(rawip_conn_cache, connp); 7675240Snordmark } else if (connp->conn_flags & IPCL_RTSCONN) { 7685240Snordmark connp->conn_flags = IPCL_RTSCONN; 7695240Snordmark kmem_cache_free(rts_conn_cache, connp); 7700Sstevel@tonic-gate } else { 7715240Snordmark connp->conn_flags = IPCL_IPCCONN; 7725240Snordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 7735240Snordmark ASSERT(connp->conn_priv == NULL); 7745240Snordmark kmem_cache_free(ip_conn_cache, connp); 7750Sstevel@tonic-gate } 7760Sstevel@tonic-gate } 7770Sstevel@tonic-gate 7780Sstevel@tonic-gate /* 7790Sstevel@tonic-gate * Running in cluster mode - deregister listener information 7800Sstevel@tonic-gate */ 7810Sstevel@tonic-gate 7820Sstevel@tonic-gate static void 7830Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp) 7840Sstevel@tonic-gate { 7850Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 7860Sstevel@tonic-gate ASSERT(connp->conn_lport != 0); 7870Sstevel@tonic-gate 7880Sstevel@tonic-gate if (cl_inet_unlisten != NULL) { 7890Sstevel@tonic-gate sa_family_t addr_family; 7900Sstevel@tonic-gate uint8_t *laddrp; 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 7930Sstevel@tonic-gate addr_family = AF_INET6; 7940Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source_v6; 7950Sstevel@tonic-gate } else { 7960Sstevel@tonic-gate addr_family = AF_INET; 7970Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 7980Sstevel@tonic-gate } 7990Sstevel@tonic-gate (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, 8000Sstevel@tonic-gate connp->conn_lport); 8010Sstevel@tonic-gate } 8020Sstevel@tonic-gate connp->conn_flags &= ~IPCL_CL_LISTENER; 8030Sstevel@tonic-gate } 8040Sstevel@tonic-gate 8050Sstevel@tonic-gate /* 8060Sstevel@tonic-gate * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 8070Sstevel@tonic-gate * which table the conn belonged to). So for debugging we can see which hash 8080Sstevel@tonic-gate * table this connection was in. 8090Sstevel@tonic-gate */ 8100Sstevel@tonic-gate #define IPCL_HASH_REMOVE(connp) { \ 8110Sstevel@tonic-gate connf_t *connfp = (connp)->conn_fanout; \ 8120Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 8130Sstevel@tonic-gate if (connfp != NULL) { \ 8140Sstevel@tonic-gate IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 8150Sstevel@tonic-gate (void *)(connp))); \ 8160Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); \ 8170Sstevel@tonic-gate if ((connp)->conn_next != NULL) \ 8180Sstevel@tonic-gate (connp)->conn_next->conn_prev = \ 8190Sstevel@tonic-gate (connp)->conn_prev; \ 8200Sstevel@tonic-gate if ((connp)->conn_prev != NULL) \ 8210Sstevel@tonic-gate (connp)->conn_prev->conn_next = \ 8220Sstevel@tonic-gate (connp)->conn_next; \ 8230Sstevel@tonic-gate else \ 8240Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; \ 8250Sstevel@tonic-gate (connp)->conn_fanout = NULL; \ 8260Sstevel@tonic-gate (connp)->conn_next = NULL; \ 8270Sstevel@tonic-gate (connp)->conn_prev = NULL; \ 8280Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; \ 8290Sstevel@tonic-gate if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 8300Sstevel@tonic-gate ipcl_conn_unlisten((connp)); \ 8310Sstevel@tonic-gate CONN_DEC_REF((connp)); \ 8320Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); \ 8330Sstevel@tonic-gate } \ 8340Sstevel@tonic-gate } 8350Sstevel@tonic-gate 8360Sstevel@tonic-gate void 8370Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp) 8380Sstevel@tonic-gate { 8390Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 8400Sstevel@tonic-gate } 8410Sstevel@tonic-gate 8420Sstevel@tonic-gate /* 8430Sstevel@tonic-gate * The whole purpose of this function is allow removal of 8440Sstevel@tonic-gate * a conn_t from the connected hash for timewait reclaim. 8450Sstevel@tonic-gate * This is essentially a TW reclaim fastpath where timewait 8460Sstevel@tonic-gate * collector checks under fanout lock (so no one else can 8470Sstevel@tonic-gate * get access to the conn_t) that refcnt is 2 i.e. one for 8480Sstevel@tonic-gate * TCP and one for the classifier hash list. If ref count 8490Sstevel@tonic-gate * is indeed 2, we can just remove the conn under lock and 8500Sstevel@tonic-gate * avoid cleaning up the conn under squeue. This gives us 8510Sstevel@tonic-gate * improved performance. 8520Sstevel@tonic-gate */ 8530Sstevel@tonic-gate void 8540Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 8550Sstevel@tonic-gate { 8560Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connfp->connf_lock)); 8570Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 8580Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 8590Sstevel@tonic-gate 8600Sstevel@tonic-gate if ((connp)->conn_next != NULL) { 8614691Skcpoon (connp)->conn_next->conn_prev = (connp)->conn_prev; 8620Sstevel@tonic-gate } 8630Sstevel@tonic-gate if ((connp)->conn_prev != NULL) { 8644691Skcpoon (connp)->conn_prev->conn_next = (connp)->conn_next; 8650Sstevel@tonic-gate } else { 8660Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; 8670Sstevel@tonic-gate } 8680Sstevel@tonic-gate (connp)->conn_fanout = NULL; 8690Sstevel@tonic-gate (connp)->conn_next = NULL; 8700Sstevel@tonic-gate (connp)->conn_prev = NULL; 8710Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; 8720Sstevel@tonic-gate ASSERT((connp)->conn_ref == 2); 8730Sstevel@tonic-gate (connp)->conn_ref--; 8740Sstevel@tonic-gate } 8750Sstevel@tonic-gate 8760Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 8770Sstevel@tonic-gate ASSERT((connp)->conn_fanout == NULL); \ 8780Sstevel@tonic-gate ASSERT((connp)->conn_next == NULL); \ 8790Sstevel@tonic-gate ASSERT((connp)->conn_prev == NULL); \ 8800Sstevel@tonic-gate if ((connfp)->connf_head != NULL) { \ 8810Sstevel@tonic-gate (connfp)->connf_head->conn_prev = (connp); \ 8820Sstevel@tonic-gate (connp)->conn_next = (connfp)->connf_head; \ 8830Sstevel@tonic-gate } \ 8840Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 8850Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 8860Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 8870Sstevel@tonic-gate IPCL_CONNECTED; \ 8880Sstevel@tonic-gate CONN_INC_REF(connp); \ 8890Sstevel@tonic-gate } 8900Sstevel@tonic-gate 8910Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 8920Sstevel@tonic-gate IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 8930Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 8940Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 8950Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 8960Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 8970Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 8980Sstevel@tonic-gate } 8990Sstevel@tonic-gate 9000Sstevel@tonic-gate #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 9010Sstevel@tonic-gate conn_t *pconnp = NULL, *nconnp; \ 9020Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 9030Sstevel@tonic-gate "connp %p", (void *)connfp, (void *)(connp))); \ 9040Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9050Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9060Sstevel@tonic-gate nconnp = (connfp)->connf_head; \ 907153Sethindra while (nconnp != NULL && \ 908153Sethindra !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 909153Sethindra pconnp = nconnp; \ 910153Sethindra nconnp = nconnp->conn_next; \ 9110Sstevel@tonic-gate } \ 9120Sstevel@tonic-gate if (pconnp != NULL) { \ 9130Sstevel@tonic-gate pconnp->conn_next = (connp); \ 9140Sstevel@tonic-gate (connp)->conn_prev = pconnp; \ 9150Sstevel@tonic-gate } else { \ 9160Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 9170Sstevel@tonic-gate } \ 9180Sstevel@tonic-gate if (nconnp != NULL) { \ 9190Sstevel@tonic-gate (connp)->conn_next = nconnp; \ 9200Sstevel@tonic-gate nconnp->conn_prev = (connp); \ 9210Sstevel@tonic-gate } \ 9220Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9230Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9240Sstevel@tonic-gate IPCL_BOUND; \ 9250Sstevel@tonic-gate CONN_INC_REF(connp); \ 9260Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9270Sstevel@tonic-gate } 9280Sstevel@tonic-gate 9290Sstevel@tonic-gate #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 9300Sstevel@tonic-gate conn_t **list, *prev, *next; \ 9310Sstevel@tonic-gate boolean_t isv4mapped = \ 9320Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 9330Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 9340Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 9350Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9360Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9370Sstevel@tonic-gate list = &(connfp)->connf_head; \ 9380Sstevel@tonic-gate prev = NULL; \ 9390Sstevel@tonic-gate while ((next = *list) != NULL) { \ 9400Sstevel@tonic-gate if (isv4mapped && \ 9410Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 9420Sstevel@tonic-gate connp->conn_zoneid == next->conn_zoneid) { \ 9430Sstevel@tonic-gate (connp)->conn_next = next; \ 9440Sstevel@tonic-gate if (prev != NULL) \ 9450Sstevel@tonic-gate prev = next->conn_prev; \ 9460Sstevel@tonic-gate next->conn_prev = (connp); \ 9470Sstevel@tonic-gate break; \ 9480Sstevel@tonic-gate } \ 9490Sstevel@tonic-gate list = &next->conn_next; \ 9500Sstevel@tonic-gate prev = next; \ 9510Sstevel@tonic-gate } \ 9520Sstevel@tonic-gate (connp)->conn_prev = prev; \ 9530Sstevel@tonic-gate *list = (connp); \ 9540Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9550Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9560Sstevel@tonic-gate IPCL_BOUND; \ 9570Sstevel@tonic-gate CONN_INC_REF((connp)); \ 9580Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9590Sstevel@tonic-gate } 9600Sstevel@tonic-gate 9610Sstevel@tonic-gate void 9620Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 9630Sstevel@tonic-gate { 9640Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9650Sstevel@tonic-gate } 9660Sstevel@tonic-gate 9670Sstevel@tonic-gate void 9680Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol) 9690Sstevel@tonic-gate { 9700Sstevel@tonic-gate connf_t *connfp; 9713448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 9720Sstevel@tonic-gate 9730Sstevel@tonic-gate ASSERT(connp != NULL); 9741676Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 9751676Sjpk protocol == IPPROTO_ESP); 9760Sstevel@tonic-gate 9770Sstevel@tonic-gate connp->conn_ulp = protocol; 9780Sstevel@tonic-gate 9790Sstevel@tonic-gate /* Insert it in the protocol hash */ 9803448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 9810Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9820Sstevel@tonic-gate } 9830Sstevel@tonic-gate 9840Sstevel@tonic-gate void 9850Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 9860Sstevel@tonic-gate { 9870Sstevel@tonic-gate connf_t *connfp; 9883448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 9890Sstevel@tonic-gate 9900Sstevel@tonic-gate ASSERT(connp != NULL); 9911676Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 9921676Sjpk protocol == IPPROTO_ESP); 9930Sstevel@tonic-gate 9940Sstevel@tonic-gate connp->conn_ulp = protocol; 9950Sstevel@tonic-gate 9960Sstevel@tonic-gate /* Insert it in the Bind Hash */ 9973448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 9980Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9990Sstevel@tonic-gate } 10000Sstevel@tonic-gate 10010Sstevel@tonic-gate /* 10020Sstevel@tonic-gate * This function is used only for inserting SCTP raw socket now. 10030Sstevel@tonic-gate * This may change later. 10040Sstevel@tonic-gate * 10050Sstevel@tonic-gate * Note that only one raw socket can be bound to a port. The param 10060Sstevel@tonic-gate * lport is in network byte order. 10070Sstevel@tonic-gate */ 10080Sstevel@tonic-gate static int 10090Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 10100Sstevel@tonic-gate { 10110Sstevel@tonic-gate connf_t *connfp; 10120Sstevel@tonic-gate conn_t *oconnp; 10133448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 10140Sstevel@tonic-gate 10153448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 10160Sstevel@tonic-gate 10170Sstevel@tonic-gate /* Check for existing raw socket already bound to the port. */ 10180Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 10190Sstevel@tonic-gate for (oconnp = connfp->connf_head; oconnp != NULL; 1020409Skcpoon oconnp = oconnp->conn_next) { 10210Sstevel@tonic-gate if (oconnp->conn_lport == lport && 10220Sstevel@tonic-gate oconnp->conn_zoneid == connp->conn_zoneid && 10230Sstevel@tonic-gate oconnp->conn_af_isv6 == connp->conn_af_isv6 && 10240Sstevel@tonic-gate ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 10250Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 10260Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 10270Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 10280Sstevel@tonic-gate IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 10290Sstevel@tonic-gate &connp->conn_srcv6))) { 10300Sstevel@tonic-gate break; 10310Sstevel@tonic-gate } 10320Sstevel@tonic-gate } 10330Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 10340Sstevel@tonic-gate if (oconnp != NULL) 10350Sstevel@tonic-gate return (EADDRNOTAVAIL); 10360Sstevel@tonic-gate 10370Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 10380Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 10390Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 10400Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 10410Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10420Sstevel@tonic-gate } else { 10430Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10440Sstevel@tonic-gate } 10450Sstevel@tonic-gate } else { 10460Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 10470Sstevel@tonic-gate } 10480Sstevel@tonic-gate return (0); 10490Sstevel@tonic-gate } 10500Sstevel@tonic-gate 10510Sstevel@tonic-gate /* 10521676Sjpk * Check for a MAC exemption conflict on a labeled system. Note that for 10531676Sjpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 10541676Sjpk * transport layer. This check is for binding all other protocols. 10551676Sjpk * 10561676Sjpk * Returns true if there's a conflict. 10571676Sjpk */ 10581676Sjpk static boolean_t 10593448Sdh155122 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 10601676Sjpk { 10611676Sjpk connf_t *connfp; 10621676Sjpk conn_t *tconn; 10631676Sjpk 10643448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 10651676Sjpk mutex_enter(&connfp->connf_lock); 10661676Sjpk for (tconn = connfp->connf_head; tconn != NULL; 10671676Sjpk tconn = tconn->conn_next) { 10681676Sjpk /* We don't allow v4 fallback for v6 raw socket */ 10691676Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 10701676Sjpk continue; 10711676Sjpk /* If neither is exempt, then there's no conflict */ 10721676Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 10731676Sjpk continue; 10741676Sjpk /* If both are bound to different specific addrs, ok */ 10751676Sjpk if (connp->conn_src != INADDR_ANY && 10761676Sjpk tconn->conn_src != INADDR_ANY && 10771676Sjpk connp->conn_src != tconn->conn_src) 10781676Sjpk continue; 10791676Sjpk /* These two conflict; fail */ 10801676Sjpk break; 10811676Sjpk } 10821676Sjpk mutex_exit(&connfp->connf_lock); 10831676Sjpk return (tconn != NULL); 10841676Sjpk } 10851676Sjpk 10861676Sjpk static boolean_t 10873448Sdh155122 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 10881676Sjpk { 10891676Sjpk connf_t *connfp; 10901676Sjpk conn_t *tconn; 10911676Sjpk 10923448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 10931676Sjpk mutex_enter(&connfp->connf_lock); 10941676Sjpk for (tconn = connfp->connf_head; tconn != NULL; 10951676Sjpk tconn = tconn->conn_next) { 10961676Sjpk /* We don't allow v4 fallback for v6 raw socket */ 10971676Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 10981676Sjpk continue; 10991676Sjpk /* If neither is exempt, then there's no conflict */ 11001676Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 11011676Sjpk continue; 11021676Sjpk /* If both are bound to different addrs, ok */ 11031676Sjpk if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) && 11041676Sjpk !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) && 11051676Sjpk !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6)) 11061676Sjpk continue; 11071676Sjpk /* These two conflict; fail */ 11081676Sjpk break; 11091676Sjpk } 11101676Sjpk mutex_exit(&connfp->connf_lock); 11111676Sjpk return (tconn != NULL); 11121676Sjpk } 11131676Sjpk 11141676Sjpk /* 11150Sstevel@tonic-gate * (v4, v6) bind hash insertion routines 11160Sstevel@tonic-gate */ 11170Sstevel@tonic-gate int 11180Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 11190Sstevel@tonic-gate { 11200Sstevel@tonic-gate connf_t *connfp; 11210Sstevel@tonic-gate #ifdef IPCL_DEBUG 11220Sstevel@tonic-gate char buf[INET_NTOA_BUFSIZE]; 11230Sstevel@tonic-gate #endif 11240Sstevel@tonic-gate int ret = 0; 11253448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 11260Sstevel@tonic-gate 11270Sstevel@tonic-gate ASSERT(connp); 11280Sstevel@tonic-gate 11290Sstevel@tonic-gate IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 11300Sstevel@tonic-gate "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 11310Sstevel@tonic-gate 11320Sstevel@tonic-gate connp->conn_ulp = protocol; 11330Sstevel@tonic-gate IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 11340Sstevel@tonic-gate connp->conn_lport = lport; 11350Sstevel@tonic-gate 11360Sstevel@tonic-gate switch (protocol) { 11371676Sjpk default: 11383448Sdh155122 if (is_system_labeled() && 11393448Sdh155122 check_exempt_conflict_v4(connp, ipst)) 11401676Sjpk return (EADDRINUSE); 11411676Sjpk /* FALLTHROUGH */ 11420Sstevel@tonic-gate case IPPROTO_UDP: 11430Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 11440Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 11450Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - udp\n", 11460Sstevel@tonic-gate (void *)connp)); 11473448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 11483448Sdh155122 IPCL_UDP_HASH(lport, ipst)]; 11490Sstevel@tonic-gate } else { 11500Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 11510Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - protocol\n", 11520Sstevel@tonic-gate (void *)connp)); 11533448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 11540Sstevel@tonic-gate } 11550Sstevel@tonic-gate 11560Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 11570Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 11580Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 11590Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11600Sstevel@tonic-gate } else { 11610Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11620Sstevel@tonic-gate } 11630Sstevel@tonic-gate break; 11640Sstevel@tonic-gate 11650Sstevel@tonic-gate case IPPROTO_TCP: 11660Sstevel@tonic-gate 11670Sstevel@tonic-gate /* Insert it in the Bind Hash */ 11681676Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 11693448Sdh155122 connfp = &ipst->ips_ipcl_bind_fanout[ 11703448Sdh155122 IPCL_BIND_HASH(lport, ipst)]; 11710Sstevel@tonic-gate if (connp->conn_src != INADDR_ANY) { 11720Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11730Sstevel@tonic-gate } else { 11740Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11750Sstevel@tonic-gate } 11760Sstevel@tonic-gate if (cl_inet_listen != NULL) { 11770Sstevel@tonic-gate ASSERT(!connp->conn_pkt_isv6); 11780Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 11790Sstevel@tonic-gate (*cl_inet_listen)(IPPROTO_TCP, AF_INET, 11800Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source, lport); 11810Sstevel@tonic-gate } 11820Sstevel@tonic-gate break; 11830Sstevel@tonic-gate 11840Sstevel@tonic-gate case IPPROTO_SCTP: 11850Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 11860Sstevel@tonic-gate break; 11870Sstevel@tonic-gate } 11880Sstevel@tonic-gate 11890Sstevel@tonic-gate return (ret); 11900Sstevel@tonic-gate } 11910Sstevel@tonic-gate 11920Sstevel@tonic-gate int 11930Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 11940Sstevel@tonic-gate uint16_t lport) 11950Sstevel@tonic-gate { 11960Sstevel@tonic-gate connf_t *connfp; 11970Sstevel@tonic-gate int ret = 0; 11983448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 11990Sstevel@tonic-gate 12000Sstevel@tonic-gate ASSERT(connp); 12010Sstevel@tonic-gate 12020Sstevel@tonic-gate connp->conn_ulp = protocol; 12030Sstevel@tonic-gate connp->conn_srcv6 = *src; 12040Sstevel@tonic-gate connp->conn_lport = lport; 12050Sstevel@tonic-gate 12060Sstevel@tonic-gate switch (protocol) { 12071676Sjpk default: 12083448Sdh155122 if (is_system_labeled() && 12093448Sdh155122 check_exempt_conflict_v6(connp, ipst)) 12101676Sjpk return (EADDRINUSE); 12111676Sjpk /* FALLTHROUGH */ 12120Sstevel@tonic-gate case IPPROTO_UDP: 12130Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 12140Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 12150Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - udp\n", 12160Sstevel@tonic-gate (void *)connp)); 12173448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 12183448Sdh155122 IPCL_UDP_HASH(lport, ipst)]; 12190Sstevel@tonic-gate } else { 12200Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 12210Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - protocol\n", 12220Sstevel@tonic-gate (void *)connp)); 12233448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 12240Sstevel@tonic-gate } 12250Sstevel@tonic-gate 12260Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 12270Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 12280Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 12290Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12300Sstevel@tonic-gate } else { 12310Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12320Sstevel@tonic-gate } 12330Sstevel@tonic-gate break; 12340Sstevel@tonic-gate 12350Sstevel@tonic-gate case IPPROTO_TCP: 12360Sstevel@tonic-gate /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 12370Sstevel@tonic-gate 12380Sstevel@tonic-gate /* Insert it in the Bind Hash */ 12391676Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 12403448Sdh155122 connfp = &ipst->ips_ipcl_bind_fanout[ 12413448Sdh155122 IPCL_BIND_HASH(lport, ipst)]; 12420Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 12430Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12440Sstevel@tonic-gate } else { 12450Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12460Sstevel@tonic-gate } 12470Sstevel@tonic-gate if (cl_inet_listen != NULL) { 12480Sstevel@tonic-gate sa_family_t addr_family; 12490Sstevel@tonic-gate uint8_t *laddrp; 12500Sstevel@tonic-gate 12510Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 12520Sstevel@tonic-gate addr_family = AF_INET6; 12530Sstevel@tonic-gate laddrp = 12540Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source_v6; 12550Sstevel@tonic-gate } else { 12560Sstevel@tonic-gate addr_family = AF_INET; 12570Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 12580Sstevel@tonic-gate } 12590Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 12600Sstevel@tonic-gate (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, 12610Sstevel@tonic-gate lport); 12620Sstevel@tonic-gate } 12630Sstevel@tonic-gate break; 12640Sstevel@tonic-gate 12650Sstevel@tonic-gate case IPPROTO_SCTP: 12660Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 12670Sstevel@tonic-gate break; 12680Sstevel@tonic-gate } 12690Sstevel@tonic-gate 12700Sstevel@tonic-gate return (ret); 12710Sstevel@tonic-gate } 12720Sstevel@tonic-gate 12730Sstevel@tonic-gate /* 12740Sstevel@tonic-gate * ipcl_conn_hash insertion routines. 12750Sstevel@tonic-gate */ 12760Sstevel@tonic-gate int 12770Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 12780Sstevel@tonic-gate ipaddr_t rem, uint32_t ports) 12790Sstevel@tonic-gate { 12800Sstevel@tonic-gate connf_t *connfp; 12810Sstevel@tonic-gate uint16_t *up; 12820Sstevel@tonic-gate conn_t *tconnp; 12830Sstevel@tonic-gate #ifdef IPCL_DEBUG 12840Sstevel@tonic-gate char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 12850Sstevel@tonic-gate #endif 12860Sstevel@tonic-gate in_port_t lport; 12870Sstevel@tonic-gate int ret = 0; 12883448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12890Sstevel@tonic-gate 12900Sstevel@tonic-gate IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 12910Sstevel@tonic-gate "dst = %s, ports = %x, protocol = %x", (void *)connp, 12920Sstevel@tonic-gate inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 12930Sstevel@tonic-gate ports, protocol)); 12940Sstevel@tonic-gate 12950Sstevel@tonic-gate switch (protocol) { 12960Sstevel@tonic-gate case IPPROTO_TCP: 12970Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 12980Sstevel@tonic-gate /* 12990Sstevel@tonic-gate * for a eager connection, i.e connections which 13000Sstevel@tonic-gate * have just been created, the initialization is 13010Sstevel@tonic-gate * already done in ip at conn_creation time, so 13020Sstevel@tonic-gate * we can skip the checks here. 13030Sstevel@tonic-gate */ 13040Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 13050Sstevel@tonic-gate } 13063448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[ 13073448Sdh155122 IPCL_CONN_HASH(connp->conn_rem, 13083448Sdh155122 connp->conn_ports, ipst)]; 13090Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13100Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 13110Sstevel@tonic-gate tconnp = tconnp->conn_next) { 13120Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 13130Sstevel@tonic-gate connp->conn_rem, connp->conn_src, 13140Sstevel@tonic-gate connp->conn_ports)) { 13150Sstevel@tonic-gate 13160Sstevel@tonic-gate /* Already have a conn. bail out */ 13170Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13180Sstevel@tonic-gate return (EADDRINUSE); 13190Sstevel@tonic-gate } 13200Sstevel@tonic-gate } 13210Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 13220Sstevel@tonic-gate /* 13230Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 13240Sstevel@tonic-gate * rebind. Let it happen. 13250Sstevel@tonic-gate */ 13260Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13270Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 13280Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13290Sstevel@tonic-gate } 13303104Sjprakash 13313104Sjprakash ASSERT(connp->conn_recv != NULL); 13323104Sjprakash 13330Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 13340Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13350Sstevel@tonic-gate break; 13360Sstevel@tonic-gate 13370Sstevel@tonic-gate case IPPROTO_SCTP: 1338409Skcpoon /* 1339409Skcpoon * The raw socket may have already been bound, remove it 1340409Skcpoon * from the hash first. 1341409Skcpoon */ 1342409Skcpoon IPCL_HASH_REMOVE(connp); 1343409Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 13440Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 13450Sstevel@tonic-gate break; 13460Sstevel@tonic-gate 13471676Sjpk default: 13481676Sjpk /* 13491676Sjpk * Check for conflicts among MAC exempt bindings. For 13501676Sjpk * transports with port numbers, this is done by the upper 13511676Sjpk * level per-transport binding logic. For all others, it's 13521676Sjpk * done here. 13531676Sjpk */ 13543448Sdh155122 if (is_system_labeled() && 13553448Sdh155122 check_exempt_conflict_v4(connp, ipst)) 13561676Sjpk return (EADDRINUSE); 13571676Sjpk /* FALLTHROUGH */ 13581676Sjpk 13590Sstevel@tonic-gate case IPPROTO_UDP: 13600Sstevel@tonic-gate up = (uint16_t *)&ports; 13610Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 13620Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 13633448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 13643448Sdh155122 IPCL_UDP_HASH(up[1], ipst)]; 13650Sstevel@tonic-gate } else { 13663448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 13670Sstevel@tonic-gate } 13680Sstevel@tonic-gate 13690Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 13700Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 13710Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 13720Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 13730Sstevel@tonic-gate } else { 13740Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 13750Sstevel@tonic-gate } 13760Sstevel@tonic-gate break; 13770Sstevel@tonic-gate } 13780Sstevel@tonic-gate 13790Sstevel@tonic-gate return (ret); 13800Sstevel@tonic-gate } 13810Sstevel@tonic-gate 13820Sstevel@tonic-gate int 13830Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 13840Sstevel@tonic-gate const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 13850Sstevel@tonic-gate { 13860Sstevel@tonic-gate connf_t *connfp; 13870Sstevel@tonic-gate uint16_t *up; 13880Sstevel@tonic-gate conn_t *tconnp; 13890Sstevel@tonic-gate in_port_t lport; 13900Sstevel@tonic-gate int ret = 0; 13913448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13920Sstevel@tonic-gate 13930Sstevel@tonic-gate switch (protocol) { 13940Sstevel@tonic-gate case IPPROTO_TCP: 13950Sstevel@tonic-gate /* Just need to insert a conn struct */ 13960Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 13970Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 13980Sstevel@tonic-gate } 13993448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[ 14003448Sdh155122 IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports, 14013448Sdh155122 ipst)]; 14020Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14030Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 14040Sstevel@tonic-gate tconnp = tconnp->conn_next) { 14050Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 14060Sstevel@tonic-gate connp->conn_remv6, connp->conn_srcv6, 14070Sstevel@tonic-gate connp->conn_ports) && 14080Sstevel@tonic-gate (tconnp->conn_tcp->tcp_bound_if == 0 || 14090Sstevel@tonic-gate tconnp->conn_tcp->tcp_bound_if == ifindex)) { 14100Sstevel@tonic-gate /* Already have a conn. bail out */ 14110Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14120Sstevel@tonic-gate return (EADDRINUSE); 14130Sstevel@tonic-gate } 14140Sstevel@tonic-gate } 14150Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 14160Sstevel@tonic-gate /* 14170Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 14180Sstevel@tonic-gate * rebind. Let it happen. 14190Sstevel@tonic-gate */ 14200Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14210Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 14220Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14230Sstevel@tonic-gate } 14240Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 14250Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14260Sstevel@tonic-gate break; 14270Sstevel@tonic-gate 14280Sstevel@tonic-gate case IPPROTO_SCTP: 1429409Skcpoon IPCL_HASH_REMOVE(connp); 1430409Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 14310Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 14320Sstevel@tonic-gate break; 14330Sstevel@tonic-gate 14341676Sjpk default: 14353448Sdh155122 if (is_system_labeled() && 14363448Sdh155122 check_exempt_conflict_v6(connp, ipst)) 14371676Sjpk return (EADDRINUSE); 14381676Sjpk /* FALLTHROUGH */ 14390Sstevel@tonic-gate case IPPROTO_UDP: 14400Sstevel@tonic-gate up = (uint16_t *)&ports; 14410Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 14420Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 14433448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 14443448Sdh155122 IPCL_UDP_HASH(up[1], ipst)]; 14450Sstevel@tonic-gate } else { 14463448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 14470Sstevel@tonic-gate } 14480Sstevel@tonic-gate 14490Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 14500Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 14510Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 14520Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 14530Sstevel@tonic-gate } else { 14540Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 14550Sstevel@tonic-gate } 14560Sstevel@tonic-gate break; 14570Sstevel@tonic-gate } 14580Sstevel@tonic-gate 14590Sstevel@tonic-gate return (ret); 14600Sstevel@tonic-gate } 14610Sstevel@tonic-gate 14620Sstevel@tonic-gate /* 14630Sstevel@tonic-gate * v4 packet classifying function. looks up the fanout table to 14640Sstevel@tonic-gate * find the conn, the packet belongs to. returns the conn with 14650Sstevel@tonic-gate * the reference held, null otherwise. 14661676Sjpk * 14671676Sjpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection 14681676Sjpk * Lookup" comment block are applied. Labels are also checked as described 14691676Sjpk * above. If the packet is from the inside (looped back), and is from the same 14701676Sjpk * zone, then label checks are omitted. 14710Sstevel@tonic-gate */ 14720Sstevel@tonic-gate conn_t * 14733448Sdh155122 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 14743448Sdh155122 ip_stack_t *ipst) 14750Sstevel@tonic-gate { 14760Sstevel@tonic-gate ipha_t *ipha; 14770Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 14780Sstevel@tonic-gate uint16_t lport; 14790Sstevel@tonic-gate uint16_t fport; 14800Sstevel@tonic-gate uint32_t ports; 14810Sstevel@tonic-gate conn_t *connp; 14820Sstevel@tonic-gate uint16_t *up; 14831676Sjpk boolean_t shared_addr; 14841676Sjpk boolean_t unlabeled; 14850Sstevel@tonic-gate 14860Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 14870Sstevel@tonic-gate up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 14880Sstevel@tonic-gate 14890Sstevel@tonic-gate switch (protocol) { 14900Sstevel@tonic-gate case IPPROTO_TCP: 14910Sstevel@tonic-gate ports = *(uint32_t *)up; 14920Sstevel@tonic-gate connfp = 14933448Sdh155122 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 14943448Sdh155122 ports, ipst)]; 14950Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14960Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 14970Sstevel@tonic-gate connp = connp->conn_next) { 14980Sstevel@tonic-gate if (IPCL_CONN_MATCH(connp, protocol, 14990Sstevel@tonic-gate ipha->ipha_src, ipha->ipha_dst, ports)) 15000Sstevel@tonic-gate break; 15010Sstevel@tonic-gate } 15020Sstevel@tonic-gate 15030Sstevel@tonic-gate if (connp != NULL) { 15041676Sjpk /* 15051676Sjpk * We have a fully-bound TCP connection. 15061676Sjpk * 15071676Sjpk * For labeled systems, there's no need to check the 15081676Sjpk * label here. It's known to be good as we checked 15091676Sjpk * before allowing the connection to become bound. 15101676Sjpk */ 15110Sstevel@tonic-gate CONN_INC_REF(connp); 15120Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15130Sstevel@tonic-gate return (connp); 15140Sstevel@tonic-gate } 15150Sstevel@tonic-gate 15160Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15170Sstevel@tonic-gate 15180Sstevel@tonic-gate lport = up[1]; 15191676Sjpk unlabeled = B_FALSE; 15201676Sjpk /* Cred cannot be null on IPv4 */ 15211676Sjpk if (is_system_labeled()) 15221676Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 15231676Sjpk TSLF_UNLABELED) != 0; 15241676Sjpk shared_addr = (zoneid == ALL_ZONES); 15251676Sjpk if (shared_addr) { 15263448Sdh155122 /* 15273448Sdh155122 * No need to handle exclusive-stack zones since 15283448Sdh155122 * ALL_ZONES only applies to the shared stack. 15293448Sdh155122 */ 15301676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 15311676Sjpk /* 15321676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 15331676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 15341676Sjpk * search for the zone based on the packet label. 15351676Sjpk * 15361676Sjpk * If there is such a zone, we prefer to find a 15371676Sjpk * connection in it. Otherwise, we look for a 15381676Sjpk * MAC-exempt connection in any zone whose label 15391676Sjpk * dominates the default label on the packet. 15401676Sjpk */ 15411676Sjpk if (zoneid == ALL_ZONES) 15421676Sjpk zoneid = tsol_packet_to_zoneid(mp); 15431676Sjpk else 15441676Sjpk unlabeled = B_FALSE; 15451676Sjpk } 15461676Sjpk 15473448Sdh155122 bind_connfp = 15483448Sdh155122 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 15490Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 15500Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 15510Sstevel@tonic-gate connp = connp->conn_next) { 15521676Sjpk if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 15532263Ssommerfe lport) && (IPCL_ZONE_MATCH(connp, zoneid) || 15541676Sjpk (unlabeled && connp->conn_mac_exempt))) 15550Sstevel@tonic-gate break; 15560Sstevel@tonic-gate } 15570Sstevel@tonic-gate 15581676Sjpk /* 15591676Sjpk * If the matching connection is SLP on a private address, then 15601676Sjpk * the label on the packet must match the local zone's label. 15611676Sjpk * Otherwise, it must be in the label range defined by tnrh. 15621676Sjpk * This is ensured by tsol_receive_label. 15631676Sjpk */ 15641676Sjpk if (connp != NULL && is_system_labeled() && 15651676Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 15661676Sjpk shared_addr, connp)) { 15671676Sjpk DTRACE_PROBE3( 15681676Sjpk tx__ip__log__info__classify__tcp, 15691676Sjpk char *, 15701676Sjpk "connp(1) could not receive mp(2)", 15711676Sjpk conn_t *, connp, mblk_t *, mp); 15721676Sjpk connp = NULL; 15731676Sjpk } 15741676Sjpk 15750Sstevel@tonic-gate if (connp != NULL) { 15761676Sjpk /* Have a listener at least */ 15770Sstevel@tonic-gate CONN_INC_REF(connp); 15780Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 15790Sstevel@tonic-gate return (connp); 15800Sstevel@tonic-gate } 15810Sstevel@tonic-gate 15820Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 15830Sstevel@tonic-gate 15840Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 15850Sstevel@tonic-gate ("ipcl_classify: couldn't classify mp = %p\n", 15860Sstevel@tonic-gate (void *)mp)); 15870Sstevel@tonic-gate break; 15880Sstevel@tonic-gate 15890Sstevel@tonic-gate case IPPROTO_UDP: 15900Sstevel@tonic-gate lport = up[1]; 15911676Sjpk unlabeled = B_FALSE; 15921676Sjpk /* Cred cannot be null on IPv4 */ 15931676Sjpk if (is_system_labeled()) 15941676Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 15951676Sjpk TSLF_UNLABELED) != 0; 15961676Sjpk shared_addr = (zoneid == ALL_ZONES); 15971676Sjpk if (shared_addr) { 15983448Sdh155122 /* 15993448Sdh155122 * No need to handle exclusive-stack zones since 16003448Sdh155122 * ALL_ZONES only applies to the shared stack. 16013448Sdh155122 */ 16021676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 16031676Sjpk /* 16041676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 16051676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 16061676Sjpk * search for the zone based on the packet label. 16071676Sjpk * 16081676Sjpk * If there is such a zone, we prefer to find a 16091676Sjpk * connection in it. Otherwise, we look for a 16101676Sjpk * MAC-exempt connection in any zone whose label 16111676Sjpk * dominates the default label on the packet. 16121676Sjpk */ 16131676Sjpk if (zoneid == ALL_ZONES) 16141676Sjpk zoneid = tsol_packet_to_zoneid(mp); 16151676Sjpk else 16161676Sjpk unlabeled = B_FALSE; 16171676Sjpk } 16180Sstevel@tonic-gate fport = up[0]; 16190Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 16203448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 16210Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16220Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16230Sstevel@tonic-gate connp = connp->conn_next) { 16240Sstevel@tonic-gate if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 16250Sstevel@tonic-gate fport, ipha->ipha_src) && 16262263Ssommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 16271676Sjpk (unlabeled && connp->conn_mac_exempt))) 16280Sstevel@tonic-gate break; 16290Sstevel@tonic-gate } 16300Sstevel@tonic-gate 16311676Sjpk if (connp != NULL && is_system_labeled() && 16321676Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 16331676Sjpk shared_addr, connp)) { 16341676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp, 16351676Sjpk char *, "connp(1) could not receive mp(2)", 16361676Sjpk conn_t *, connp, mblk_t *, mp); 16371676Sjpk connp = NULL; 16381676Sjpk } 16391676Sjpk 16400Sstevel@tonic-gate if (connp != NULL) { 16410Sstevel@tonic-gate CONN_INC_REF(connp); 16420Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16430Sstevel@tonic-gate return (connp); 16440Sstevel@tonic-gate } 16450Sstevel@tonic-gate 16460Sstevel@tonic-gate /* 16470Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 16480Sstevel@tonic-gate */ 16490Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16500Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 16510Sstevel@tonic-gate ("ipcl_classify: cant find udp conn_t for ports : %x %x", 16520Sstevel@tonic-gate lport, fport)); 16530Sstevel@tonic-gate break; 16540Sstevel@tonic-gate } 16550Sstevel@tonic-gate 16560Sstevel@tonic-gate return (NULL); 16570Sstevel@tonic-gate } 16580Sstevel@tonic-gate 16590Sstevel@tonic-gate conn_t * 16603448Sdh155122 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 16613448Sdh155122 ip_stack_t *ipst) 16620Sstevel@tonic-gate { 16630Sstevel@tonic-gate ip6_t *ip6h; 16640Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 16650Sstevel@tonic-gate uint16_t lport; 16660Sstevel@tonic-gate uint16_t fport; 16670Sstevel@tonic-gate tcph_t *tcph; 16680Sstevel@tonic-gate uint32_t ports; 16690Sstevel@tonic-gate conn_t *connp; 16700Sstevel@tonic-gate uint16_t *up; 16711676Sjpk boolean_t shared_addr; 16721676Sjpk boolean_t unlabeled; 16730Sstevel@tonic-gate 16740Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 16750Sstevel@tonic-gate 16760Sstevel@tonic-gate switch (protocol) { 16770Sstevel@tonic-gate case IPPROTO_TCP: 16780Sstevel@tonic-gate tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 16790Sstevel@tonic-gate up = (uint16_t *)tcph->th_lport; 16800Sstevel@tonic-gate ports = *(uint32_t *)up; 16810Sstevel@tonic-gate 16820Sstevel@tonic-gate connfp = 16833448Sdh155122 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 16843448Sdh155122 ports, ipst)]; 16850Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16860Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16870Sstevel@tonic-gate connp = connp->conn_next) { 16880Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(connp, protocol, 16890Sstevel@tonic-gate ip6h->ip6_src, ip6h->ip6_dst, ports)) 16900Sstevel@tonic-gate break; 16910Sstevel@tonic-gate } 16920Sstevel@tonic-gate 16930Sstevel@tonic-gate if (connp != NULL) { 16941676Sjpk /* 16951676Sjpk * We have a fully-bound TCP connection. 16961676Sjpk * 16971676Sjpk * For labeled systems, there's no need to check the 16981676Sjpk * label here. It's known to be good as we checked 16991676Sjpk * before allowing the connection to become bound. 17001676Sjpk */ 17010Sstevel@tonic-gate CONN_INC_REF(connp); 17020Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17030Sstevel@tonic-gate return (connp); 17040Sstevel@tonic-gate } 17050Sstevel@tonic-gate 17060Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17070Sstevel@tonic-gate 17080Sstevel@tonic-gate lport = up[1]; 17091676Sjpk unlabeled = B_FALSE; 17101676Sjpk /* Cred can be null on IPv6 */ 17111676Sjpk if (is_system_labeled()) { 17121676Sjpk cred_t *cr = DB_CRED(mp); 17131676Sjpk 17141676Sjpk unlabeled = (cr != NULL && 17151676Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 17161676Sjpk } 17171676Sjpk shared_addr = (zoneid == ALL_ZONES); 17181676Sjpk if (shared_addr) { 17193448Sdh155122 /* 17203448Sdh155122 * No need to handle exclusive-stack zones since 17213448Sdh155122 * ALL_ZONES only applies to the shared stack. 17223448Sdh155122 */ 17231676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 17241676Sjpk /* 17251676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 17261676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 17271676Sjpk * search for the zone based on the packet label. 17281676Sjpk * 17291676Sjpk * If there is such a zone, we prefer to find a 17301676Sjpk * connection in it. Otherwise, we look for a 17311676Sjpk * MAC-exempt connection in any zone whose label 17321676Sjpk * dominates the default label on the packet. 17331676Sjpk */ 17341676Sjpk if (zoneid == ALL_ZONES) 17351676Sjpk zoneid = tsol_packet_to_zoneid(mp); 17361676Sjpk else 17371676Sjpk unlabeled = B_FALSE; 17381676Sjpk } 17391676Sjpk 17403448Sdh155122 bind_connfp = 17413448Sdh155122 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 17420Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 17430Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 17440Sstevel@tonic-gate connp = connp->conn_next) { 17450Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, protocol, 17460Sstevel@tonic-gate ip6h->ip6_dst, lport) && 17472263Ssommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 17481676Sjpk (unlabeled && connp->conn_mac_exempt))) 17490Sstevel@tonic-gate break; 17500Sstevel@tonic-gate } 17510Sstevel@tonic-gate 17521676Sjpk if (connp != NULL && is_system_labeled() && 17531676Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 17541676Sjpk shared_addr, connp)) { 17551676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 17561676Sjpk char *, "connp(1) could not receive mp(2)", 17571676Sjpk conn_t *, connp, mblk_t *, mp); 17581676Sjpk connp = NULL; 17591676Sjpk } 17601676Sjpk 17610Sstevel@tonic-gate if (connp != NULL) { 17620Sstevel@tonic-gate /* Have a listner at least */ 17630Sstevel@tonic-gate CONN_INC_REF(connp); 17640Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 17650Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 17660Sstevel@tonic-gate ("ipcl_classify_v6: found listner " 17670Sstevel@tonic-gate "connp = %p\n", (void *)connp)); 17680Sstevel@tonic-gate 17690Sstevel@tonic-gate return (connp); 17700Sstevel@tonic-gate } 17710Sstevel@tonic-gate 17720Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 17730Sstevel@tonic-gate 17740Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 17750Sstevel@tonic-gate ("ipcl_classify_v6: couldn't classify mp = %p\n", 17760Sstevel@tonic-gate (void *)mp)); 17770Sstevel@tonic-gate break; 17780Sstevel@tonic-gate 17790Sstevel@tonic-gate case IPPROTO_UDP: 17800Sstevel@tonic-gate up = (uint16_t *)&mp->b_rptr[hdr_len]; 17810Sstevel@tonic-gate lport = up[1]; 17821676Sjpk unlabeled = B_FALSE; 17831676Sjpk /* Cred can be null on IPv6 */ 17841676Sjpk if (is_system_labeled()) { 17851676Sjpk cred_t *cr = DB_CRED(mp); 17861676Sjpk 17871676Sjpk unlabeled = (cr != NULL && 17881676Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 17891676Sjpk } 17901676Sjpk shared_addr = (zoneid == ALL_ZONES); 17911676Sjpk if (shared_addr) { 17923448Sdh155122 /* 17933448Sdh155122 * No need to handle exclusive-stack zones since 17943448Sdh155122 * ALL_ZONES only applies to the shared stack. 17953448Sdh155122 */ 17961676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 17971676Sjpk /* 17981676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 17991676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 18001676Sjpk * search for the zone based on the packet label. 18011676Sjpk * 18021676Sjpk * If there is such a zone, we prefer to find a 18031676Sjpk * connection in it. Otherwise, we look for a 18041676Sjpk * MAC-exempt connection in any zone whose label 18051676Sjpk * dominates the default label on the packet. 18061676Sjpk */ 18071676Sjpk if (zoneid == ALL_ZONES) 18081676Sjpk zoneid = tsol_packet_to_zoneid(mp); 18091676Sjpk else 18101676Sjpk unlabeled = B_FALSE; 18111676Sjpk } 18121676Sjpk 18130Sstevel@tonic-gate fport = up[0]; 18140Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 18150Sstevel@tonic-gate fport)); 18163448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 18170Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 18180Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 18190Sstevel@tonic-gate connp = connp->conn_next) { 18200Sstevel@tonic-gate if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 18210Sstevel@tonic-gate fport, ip6h->ip6_src) && 18222263Ssommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 18231676Sjpk (unlabeled && connp->conn_mac_exempt))) 18240Sstevel@tonic-gate break; 18250Sstevel@tonic-gate } 18260Sstevel@tonic-gate 18271676Sjpk if (connp != NULL && is_system_labeled() && 18281676Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 18291676Sjpk shared_addr, connp)) { 18301676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 18311676Sjpk char *, "connp(1) could not receive mp(2)", 18321676Sjpk conn_t *, connp, mblk_t *, mp); 18331676Sjpk connp = NULL; 18341676Sjpk } 18351676Sjpk 18360Sstevel@tonic-gate if (connp != NULL) { 18370Sstevel@tonic-gate CONN_INC_REF(connp); 18380Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 18390Sstevel@tonic-gate return (connp); 18400Sstevel@tonic-gate } 18410Sstevel@tonic-gate 18420Sstevel@tonic-gate /* 18430Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 18440Sstevel@tonic-gate */ 18450Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 18460Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 18470Sstevel@tonic-gate ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 18480Sstevel@tonic-gate lport, fport)); 18490Sstevel@tonic-gate break; 18500Sstevel@tonic-gate } 18510Sstevel@tonic-gate 18520Sstevel@tonic-gate return (NULL); 18530Sstevel@tonic-gate } 18540Sstevel@tonic-gate 18550Sstevel@tonic-gate /* 18560Sstevel@tonic-gate * wrapper around ipcl_classify_(v4,v6) routines. 18570Sstevel@tonic-gate */ 18580Sstevel@tonic-gate conn_t * 18593448Sdh155122 ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst) 18600Sstevel@tonic-gate { 18610Sstevel@tonic-gate uint16_t hdr_len; 18620Sstevel@tonic-gate ipha_t *ipha; 18630Sstevel@tonic-gate uint8_t *nexthdrp; 18640Sstevel@tonic-gate 18650Sstevel@tonic-gate if (MBLKL(mp) < sizeof (ipha_t)) 18660Sstevel@tonic-gate return (NULL); 18670Sstevel@tonic-gate 18680Sstevel@tonic-gate switch (IPH_HDR_VERSION(mp->b_rptr)) { 18690Sstevel@tonic-gate case IPV4_VERSION: 18700Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 18710Sstevel@tonic-gate hdr_len = IPH_HDR_LENGTH(ipha); 18720Sstevel@tonic-gate return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 18733448Sdh155122 zoneid, ipst)); 18740Sstevel@tonic-gate case IPV6_VERSION: 18750Sstevel@tonic-gate if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 18760Sstevel@tonic-gate &hdr_len, &nexthdrp)) 18770Sstevel@tonic-gate return (NULL); 18780Sstevel@tonic-gate 18793448Sdh155122 return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst)); 18800Sstevel@tonic-gate } 18810Sstevel@tonic-gate 18820Sstevel@tonic-gate return (NULL); 18830Sstevel@tonic-gate } 18840Sstevel@tonic-gate 18850Sstevel@tonic-gate conn_t * 18861676Sjpk ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, 18873448Sdh155122 uint32_t ports, ipha_t *hdr, ip_stack_t *ipst) 18880Sstevel@tonic-gate { 18891676Sjpk connf_t *connfp; 18900Sstevel@tonic-gate conn_t *connp; 18910Sstevel@tonic-gate in_port_t lport; 18920Sstevel@tonic-gate int af; 18931676Sjpk boolean_t shared_addr; 18941676Sjpk boolean_t unlabeled; 18951676Sjpk const void *dst; 18960Sstevel@tonic-gate 18970Sstevel@tonic-gate lport = ((uint16_t *)&ports)[1]; 18981676Sjpk 18991676Sjpk unlabeled = B_FALSE; 19001676Sjpk /* Cred can be null on IPv6 */ 19011676Sjpk if (is_system_labeled()) { 19021676Sjpk cred_t *cr = DB_CRED(mp); 19031676Sjpk 19041676Sjpk unlabeled = (cr != NULL && 19051676Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 19061676Sjpk } 19071676Sjpk shared_addr = (zoneid == ALL_ZONES); 19081676Sjpk if (shared_addr) { 19093448Sdh155122 /* 19103448Sdh155122 * No need to handle exclusive-stack zones since ALL_ZONES 19113448Sdh155122 * only applies to the shared stack. 19123448Sdh155122 */ 19131676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 19141676Sjpk /* 19151676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 19161676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and search for 19171676Sjpk * the zone based on the packet label. 19181676Sjpk * 19191676Sjpk * If there is such a zone, we prefer to find a connection in 19201676Sjpk * it. Otherwise, we look for a MAC-exempt connection in any 19211676Sjpk * zone whose label dominates the default label on the packet. 19221676Sjpk */ 19231676Sjpk if (zoneid == ALL_ZONES) 19241676Sjpk zoneid = tsol_packet_to_zoneid(mp); 19251676Sjpk else 19261676Sjpk unlabeled = B_FALSE; 19271676Sjpk } 19281676Sjpk 19290Sstevel@tonic-gate af = IPH_HDR_VERSION(hdr); 19301676Sjpk dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst : 19311676Sjpk (const void *)&((ip6_t *)hdr)->ip6_dst; 19323448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 19330Sstevel@tonic-gate 19340Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 19350Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 19360Sstevel@tonic-gate connp = connp->conn_next) { 19370Sstevel@tonic-gate /* We don't allow v4 fallback for v6 raw socket. */ 19381676Sjpk if (af == (connp->conn_af_isv6 ? IPV4_VERSION : 19391676Sjpk IPV6_VERSION)) 19400Sstevel@tonic-gate continue; 19410Sstevel@tonic-gate if (connp->conn_fully_bound) { 19420Sstevel@tonic-gate if (af == IPV4_VERSION) { 19431676Sjpk if (!IPCL_CONN_MATCH(connp, protocol, 19441676Sjpk hdr->ipha_src, hdr->ipha_dst, ports)) 19451676Sjpk continue; 19460Sstevel@tonic-gate } else { 19471676Sjpk if (!IPCL_CONN_MATCH_V6(connp, protocol, 19480Sstevel@tonic-gate ((ip6_t *)hdr)->ip6_src, 19491676Sjpk ((ip6_t *)hdr)->ip6_dst, ports)) 19501676Sjpk continue; 19510Sstevel@tonic-gate } 19520Sstevel@tonic-gate } else { 19530Sstevel@tonic-gate if (af == IPV4_VERSION) { 19541676Sjpk if (!IPCL_BIND_MATCH(connp, protocol, 19551676Sjpk hdr->ipha_dst, lport)) 19561676Sjpk continue; 19570Sstevel@tonic-gate } else { 19581676Sjpk if (!IPCL_BIND_MATCH_V6(connp, protocol, 19591676Sjpk ((ip6_t *)hdr)->ip6_dst, lport)) 19601676Sjpk continue; 19610Sstevel@tonic-gate } 19620Sstevel@tonic-gate } 19631676Sjpk 19642263Ssommerfe if (IPCL_ZONE_MATCH(connp, zoneid) || 19651676Sjpk (unlabeled && connp->conn_mac_exempt)) 19661676Sjpk break; 19671676Sjpk } 19681676Sjpk /* 19691676Sjpk * If the connection is fully-bound and connection-oriented (TCP or 19701676Sjpk * SCTP), then we've already validated the remote system's label. 19711676Sjpk * There's no need to do it again for every packet. 19721676Sjpk */ 19731676Sjpk if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound || 19741676Sjpk !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) && 19751676Sjpk !tsol_receive_local(mp, dst, af, shared_addr, connp)) { 19761676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 19771676Sjpk char *, "connp(1) could not receive mp(2)", 19781676Sjpk conn_t *, connp, mblk_t *, mp); 19791676Sjpk connp = NULL; 19800Sstevel@tonic-gate } 1981409Skcpoon 1982409Skcpoon if (connp != NULL) 1983409Skcpoon goto found; 1984409Skcpoon mutex_exit(&connfp->connf_lock); 1985409Skcpoon 1986409Skcpoon /* Try to look for a wildcard match. */ 19873448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 1988409Skcpoon mutex_enter(&connfp->connf_lock); 1989409Skcpoon for (connp = connfp->connf_head; connp != NULL; 1990409Skcpoon connp = connp->conn_next) { 1991409Skcpoon /* We don't allow v4 fallback for v6 raw socket. */ 1992409Skcpoon if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 19932263Ssommerfe IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) { 1994409Skcpoon continue; 1995409Skcpoon } 1996409Skcpoon if (af == IPV4_VERSION) { 1997409Skcpoon if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 1998409Skcpoon break; 1999409Skcpoon } else { 2000409Skcpoon if (IPCL_RAW_MATCH_V6(connp, protocol, 2001409Skcpoon ((ip6_t *)hdr)->ip6_dst)) { 2002409Skcpoon break; 2003409Skcpoon } 2004409Skcpoon } 20050Sstevel@tonic-gate } 2006409Skcpoon 2007409Skcpoon if (connp != NULL) 2008409Skcpoon goto found; 2009409Skcpoon 20100Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 20110Sstevel@tonic-gate return (NULL); 2012409Skcpoon 2013409Skcpoon found: 2014409Skcpoon ASSERT(connp != NULL); 2015409Skcpoon CONN_INC_REF(connp); 2016409Skcpoon mutex_exit(&connfp->connf_lock); 2017409Skcpoon return (connp); 20180Sstevel@tonic-gate } 20190Sstevel@tonic-gate 20200Sstevel@tonic-gate /* ARGSUSED */ 20210Sstevel@tonic-gate static int 20225240Snordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 20230Sstevel@tonic-gate { 20240Sstevel@tonic-gate itc_t *itc = (itc_t *)buf; 20250Sstevel@tonic-gate conn_t *connp = &itc->itc_conn; 20265240Snordmark tcp_t *tcp = (tcp_t *)&itc[1]; 20275240Snordmark 20285240Snordmark bzero(connp, sizeof (conn_t)); 20295240Snordmark bzero(tcp, sizeof (tcp_t)); 20305240Snordmark 20315240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 20325240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 20330Sstevel@tonic-gate tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 20340Sstevel@tonic-gate connp->conn_tcp = tcp; 20350Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN; 20360Sstevel@tonic-gate connp->conn_ulp = IPPROTO_TCP; 20370Sstevel@tonic-gate tcp->tcp_connp = connp; 20380Sstevel@tonic-gate return (0); 20390Sstevel@tonic-gate } 20400Sstevel@tonic-gate 20410Sstevel@tonic-gate /* ARGSUSED */ 20420Sstevel@tonic-gate static void 20435240Snordmark tcp_conn_destructor(void *buf, void *cdrarg) 20445240Snordmark { 20455240Snordmark itc_t *itc = (itc_t *)buf; 20465240Snordmark conn_t *connp = &itc->itc_conn; 20475240Snordmark tcp_t *tcp = (tcp_t *)&itc[1]; 20485240Snordmark 20495240Snordmark ASSERT(connp->conn_flags & IPCL_TCPCONN); 20505240Snordmark ASSERT(tcp->tcp_connp == connp); 20515240Snordmark ASSERT(connp->conn_tcp == tcp); 20525240Snordmark tcp_timermp_free(tcp); 20535240Snordmark mutex_destroy(&connp->conn_lock); 20545240Snordmark cv_destroy(&connp->conn_cv); 20555240Snordmark } 20565240Snordmark 20575240Snordmark /* ARGSUSED */ 20585240Snordmark static int 20595240Snordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 20605240Snordmark { 20615240Snordmark itc_t *itc = (itc_t *)buf; 20625240Snordmark conn_t *connp = &itc->itc_conn; 20635240Snordmark 20645240Snordmark bzero(connp, sizeof (conn_t)); 20655240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 20665240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 20675240Snordmark connp->conn_flags = IPCL_IPCCONN; 20685240Snordmark 20695240Snordmark return (0); 20705240Snordmark } 20715240Snordmark 20725240Snordmark /* ARGSUSED */ 20735240Snordmark static void 20745240Snordmark ip_conn_destructor(void *buf, void *cdrarg) 20755240Snordmark { 20765240Snordmark itc_t *itc = (itc_t *)buf; 20775240Snordmark conn_t *connp = &itc->itc_conn; 20785240Snordmark 20795240Snordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 20805240Snordmark ASSERT(connp->conn_priv == NULL); 20815240Snordmark mutex_destroy(&connp->conn_lock); 20825240Snordmark cv_destroy(&connp->conn_cv); 20835240Snordmark } 20845240Snordmark 20855240Snordmark /* ARGSUSED */ 20865240Snordmark static int 20875240Snordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 20885240Snordmark { 20895240Snordmark itc_t *itc = (itc_t *)buf; 20905240Snordmark conn_t *connp = &itc->itc_conn; 20915240Snordmark udp_t *udp = (udp_t *)&itc[1]; 20925240Snordmark 20935240Snordmark bzero(connp, sizeof (conn_t)); 20945240Snordmark bzero(udp, sizeof (udp_t)); 20955240Snordmark 20965240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 20975240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 20985240Snordmark connp->conn_udp = udp; 20995240Snordmark connp->conn_flags = IPCL_UDPCONN; 21005240Snordmark connp->conn_ulp = IPPROTO_UDP; 21015240Snordmark udp->udp_connp = connp; 21025240Snordmark return (0); 21035240Snordmark } 21045240Snordmark 21055240Snordmark /* ARGSUSED */ 21065240Snordmark static void 21075240Snordmark udp_conn_destructor(void *buf, void *cdrarg) 21085240Snordmark { 21095240Snordmark itc_t *itc = (itc_t *)buf; 21105240Snordmark conn_t *connp = &itc->itc_conn; 21115240Snordmark udp_t *udp = (udp_t *)&itc[1]; 21125240Snordmark 21135240Snordmark ASSERT(connp->conn_flags & IPCL_UDPCONN); 21145240Snordmark ASSERT(udp->udp_connp == connp); 21155240Snordmark ASSERT(connp->conn_udp == udp); 21165240Snordmark mutex_destroy(&connp->conn_lock); 21175240Snordmark cv_destroy(&connp->conn_cv); 21185240Snordmark } 21195240Snordmark 21205240Snordmark /* ARGSUSED */ 21215240Snordmark static int 21225240Snordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 21230Sstevel@tonic-gate { 21245240Snordmark itc_t *itc = (itc_t *)buf; 21255240Snordmark conn_t *connp = &itc->itc_conn; 21265240Snordmark icmp_t *icmp = (icmp_t *)&itc[1]; 21275240Snordmark 21285240Snordmark bzero(connp, sizeof (conn_t)); 21295240Snordmark bzero(icmp, sizeof (icmp_t)); 21305240Snordmark 21315240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 21325240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 21335240Snordmark connp->conn_icmp = icmp; 21345240Snordmark connp->conn_flags = IPCL_RAWIPCONN; 21355240Snordmark connp->conn_ulp = IPPROTO_ICMP; 21365240Snordmark icmp->icmp_connp = connp; 21375240Snordmark return (0); 21385240Snordmark } 21395240Snordmark 21405240Snordmark /* ARGSUSED */ 21415240Snordmark static void 21425240Snordmark rawip_conn_destructor(void *buf, void *cdrarg) 21435240Snordmark { 21445240Snordmark itc_t *itc = (itc_t *)buf; 21455240Snordmark conn_t *connp = &itc->itc_conn; 21465240Snordmark icmp_t *icmp = (icmp_t *)&itc[1]; 21475240Snordmark 21485240Snordmark ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 21495240Snordmark ASSERT(icmp->icmp_connp == connp); 21505240Snordmark ASSERT(connp->conn_icmp == icmp); 21515240Snordmark mutex_destroy(&connp->conn_lock); 21525240Snordmark cv_destroy(&connp->conn_cv); 21535240Snordmark } 21545240Snordmark 21555240Snordmark /* ARGSUSED */ 21565240Snordmark static int 21575240Snordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 21585240Snordmark { 21595240Snordmark itc_t *itc = (itc_t *)buf; 21605240Snordmark conn_t *connp = &itc->itc_conn; 21615240Snordmark rts_t *rts = (rts_t *)&itc[1]; 21625240Snordmark 21635240Snordmark bzero(connp, sizeof (conn_t)); 21645240Snordmark bzero(rts, sizeof (rts_t)); 21655240Snordmark 21665240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 21675240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 21685240Snordmark connp->conn_rts = rts; 21695240Snordmark connp->conn_flags = IPCL_RTSCONN; 21705240Snordmark rts->rts_connp = connp; 21715240Snordmark return (0); 21725240Snordmark } 21735240Snordmark 21745240Snordmark /* ARGSUSED */ 21755240Snordmark static void 21765240Snordmark rts_conn_destructor(void *buf, void *cdrarg) 21775240Snordmark { 21785240Snordmark itc_t *itc = (itc_t *)buf; 21795240Snordmark conn_t *connp = &itc->itc_conn; 21805240Snordmark rts_t *rts = (rts_t *)&itc[1]; 21815240Snordmark 21825240Snordmark ASSERT(connp->conn_flags & IPCL_RTSCONN); 21835240Snordmark ASSERT(rts->rts_connp == connp); 21845240Snordmark ASSERT(connp->conn_rts == rts); 21855240Snordmark mutex_destroy(&connp->conn_lock); 21865240Snordmark cv_destroy(&connp->conn_cv); 21875240Snordmark } 21885240Snordmark 21895240Snordmark /* 21905240Snordmark * Called as part of ipcl_conn_destroy to assert and clear any pointers 21915240Snordmark * in the conn_t. 21925277Snordmark * 21935277Snordmark * Below we list all the pointers in the conn_t as a documentation aid. 21945277Snordmark * The ones that we can not ASSERT to be NULL are #ifdef'ed out. 21955277Snordmark * If you add any pointers to the conn_t please add an ASSERT here 21965277Snordmark * and #ifdef it out if it can't be actually asserted to be NULL. 21975277Snordmark * In any case, we bzero most of the conn_t at the end of the function. 21985240Snordmark */ 21995240Snordmark void 22005240Snordmark ipcl_conn_cleanup(conn_t *connp) 22015240Snordmark { 22025240Snordmark ASSERT(connp->conn_ire_cache == NULL); 22035240Snordmark ASSERT(connp->conn_latch == NULL); 22045240Snordmark #ifdef notdef 22055277Snordmark /* These are not cleared */ 22065240Snordmark ASSERT(connp->conn_rq == NULL); 22075240Snordmark ASSERT(connp->conn_wq == NULL); 22085240Snordmark #endif 22095240Snordmark ASSERT(connp->conn_cred == NULL); 22105240Snordmark ASSERT(connp->conn_g_fanout == NULL); 22115240Snordmark ASSERT(connp->conn_g_next == NULL); 22125240Snordmark ASSERT(connp->conn_g_prev == NULL); 22135240Snordmark ASSERT(connp->conn_policy == NULL); 22145240Snordmark ASSERT(connp->conn_fanout == NULL); 22155240Snordmark ASSERT(connp->conn_next == NULL); 22165240Snordmark ASSERT(connp->conn_prev == NULL); 22175240Snordmark #ifdef notdef 22185240Snordmark /* 22195240Snordmark * The ill and ipif pointers are not cleared before the conn_t 22205240Snordmark * goes away since they do not hold a reference on the ill/ipif. 22215240Snordmark * We should replace these pointers with ifindex/ipaddr_t to 22225240Snordmark * make the code less complex. 22235240Snordmark */ 22245240Snordmark ASSERT(connp->conn_xmit_if_ill == NULL); 22255240Snordmark ASSERT(connp->conn_nofailover_ill == NULL); 22265240Snordmark ASSERT(connp->conn_outgoing_ill == NULL); 22275240Snordmark ASSERT(connp->conn_incoming_ill == NULL); 22285240Snordmark ASSERT(connp->conn_outgoing_pill == NULL); 22295240Snordmark ASSERT(connp->conn_multicast_ipif == NULL); 22305240Snordmark ASSERT(connp->conn_multicast_ill == NULL); 22315240Snordmark #endif 22325240Snordmark ASSERT(connp->conn_oper_pending_ill == NULL); 22335240Snordmark ASSERT(connp->conn_ilg == NULL); 22345240Snordmark ASSERT(connp->conn_drain_next == NULL); 22355240Snordmark ASSERT(connp->conn_drain_prev == NULL); 22365277Snordmark #ifdef notdef 22375277Snordmark /* conn_idl is not cleared when removed from idl list */ 22385240Snordmark ASSERT(connp->conn_idl == NULL); 22395277Snordmark #endif 22405240Snordmark ASSERT(connp->conn_ipsec_opt_mp == NULL); 22415240Snordmark ASSERT(connp->conn_peercred == NULL); 22425240Snordmark ASSERT(connp->conn_netstack == NULL); 22435240Snordmark 22445240Snordmark /* Clear out the conn_t fields that are not preserved */ 22455240Snordmark bzero(&connp->conn_start_clr, 22465240Snordmark sizeof (conn_t) - 22475240Snordmark ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 22485240Snordmark 22490Sstevel@tonic-gate } 22500Sstevel@tonic-gate 22510Sstevel@tonic-gate /* 22520Sstevel@tonic-gate * All conns are inserted in a global multi-list for the benefit of 22530Sstevel@tonic-gate * walkers. The walk is guaranteed to walk all open conns at the time 22540Sstevel@tonic-gate * of the start of the walk exactly once. This property is needed to 22550Sstevel@tonic-gate * achieve some cleanups during unplumb of interfaces. This is achieved 22560Sstevel@tonic-gate * as follows. 22570Sstevel@tonic-gate * 22580Sstevel@tonic-gate * ipcl_conn_create and ipcl_conn_destroy are the only functions that 22590Sstevel@tonic-gate * call the insert and delete functions below at creation and deletion 22600Sstevel@tonic-gate * time respectively. The conn never moves or changes its position in this 22610Sstevel@tonic-gate * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 22620Sstevel@tonic-gate * won't increase due to walkers, once the conn deletion has started. Note 22630Sstevel@tonic-gate * that we can't remove the conn from the global list and then wait for 22640Sstevel@tonic-gate * the refcnt to drop to zero, since walkers would then see a truncated 22650Sstevel@tonic-gate * list. CONN_INCIPIENT ensures that walkers don't start looking at 22660Sstevel@tonic-gate * conns until ip_open is ready to make them globally visible. 22670Sstevel@tonic-gate * The global round robin multi-list locks are held only to get the 22680Sstevel@tonic-gate * next member/insertion/deletion and contention should be negligible 22690Sstevel@tonic-gate * if the multi-list is much greater than the number of cpus. 22700Sstevel@tonic-gate */ 22710Sstevel@tonic-gate void 22720Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp) 22730Sstevel@tonic-gate { 22740Sstevel@tonic-gate int index; 22753448Sdh155122 struct connf_s *connfp; 22763448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 22770Sstevel@tonic-gate 22780Sstevel@tonic-gate /* 22790Sstevel@tonic-gate * No need for atomic here. Approximate even distribution 22800Sstevel@tonic-gate * in the global lists is sufficient. 22810Sstevel@tonic-gate */ 22823448Sdh155122 ipst->ips_conn_g_index++; 22833448Sdh155122 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 22840Sstevel@tonic-gate 22850Sstevel@tonic-gate connp->conn_g_prev = NULL; 22860Sstevel@tonic-gate /* 22870Sstevel@tonic-gate * Mark as INCIPIENT, so that walkers will ignore this 22880Sstevel@tonic-gate * for now, till ip_open is ready to make it visible globally. 22890Sstevel@tonic-gate */ 22900Sstevel@tonic-gate connp->conn_state_flags |= CONN_INCIPIENT; 22910Sstevel@tonic-gate 22923448Sdh155122 connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 22930Sstevel@tonic-gate /* Insert at the head of the list */ 22943448Sdh155122 mutex_enter(&connfp->connf_lock); 22953448Sdh155122 connp->conn_g_next = connfp->connf_head; 22960Sstevel@tonic-gate if (connp->conn_g_next != NULL) 22970Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp; 22983448Sdh155122 connfp->connf_head = connp; 22990Sstevel@tonic-gate 23000Sstevel@tonic-gate /* The fanout bucket this conn points to */ 23013448Sdh155122 connp->conn_g_fanout = connfp; 23020Sstevel@tonic-gate 23033448Sdh155122 mutex_exit(&connfp->connf_lock); 23040Sstevel@tonic-gate } 23050Sstevel@tonic-gate 23060Sstevel@tonic-gate void 23070Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp) 23080Sstevel@tonic-gate { 23093448Sdh155122 struct connf_s *connfp; 23103448Sdh155122 23110Sstevel@tonic-gate /* 23120Sstevel@tonic-gate * We were never inserted in the global multi list. 23130Sstevel@tonic-gate * IPCL_NONE variety is never inserted in the global multilist 23140Sstevel@tonic-gate * since it is presumed to not need any cleanup and is transient. 23150Sstevel@tonic-gate */ 23160Sstevel@tonic-gate if (connp->conn_g_fanout == NULL) 23170Sstevel@tonic-gate return; 23180Sstevel@tonic-gate 23193448Sdh155122 connfp = connp->conn_g_fanout; 23203448Sdh155122 mutex_enter(&connfp->connf_lock); 23210Sstevel@tonic-gate if (connp->conn_g_prev != NULL) 23220Sstevel@tonic-gate connp->conn_g_prev->conn_g_next = connp->conn_g_next; 23230Sstevel@tonic-gate else 23243448Sdh155122 connfp->connf_head = connp->conn_g_next; 23250Sstevel@tonic-gate if (connp->conn_g_next != NULL) 23260Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 23273448Sdh155122 mutex_exit(&connfp->connf_lock); 23280Sstevel@tonic-gate 23290Sstevel@tonic-gate /* Better to stumble on a null pointer than to corrupt memory */ 23300Sstevel@tonic-gate connp->conn_g_next = NULL; 23310Sstevel@tonic-gate connp->conn_g_prev = NULL; 23325240Snordmark connp->conn_g_fanout = NULL; 23330Sstevel@tonic-gate } 23340Sstevel@tonic-gate 23350Sstevel@tonic-gate /* 23360Sstevel@tonic-gate * Walk the list of all conn_t's in the system, calling the function provided 23370Sstevel@tonic-gate * with the specified argument for each. 23380Sstevel@tonic-gate * Applies to both IPv4 and IPv6. 23390Sstevel@tonic-gate * 23400Sstevel@tonic-gate * IPCs may hold pointers to ipif/ill. To guard against stale pointers 23410Sstevel@tonic-gate * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 23420Sstevel@tonic-gate * unplumbed or removed. New conn_t's that are created while we are walking 23430Sstevel@tonic-gate * may be missed by this walk, because they are not necessarily inserted 23440Sstevel@tonic-gate * at the tail of the list. They are new conn_t's and thus don't have any 23450Sstevel@tonic-gate * stale pointers. The CONN_CLOSING flag ensures that no new reference 23460Sstevel@tonic-gate * is created to the struct that is going away. 23470Sstevel@tonic-gate */ 23480Sstevel@tonic-gate void 23493448Sdh155122 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 23500Sstevel@tonic-gate { 23510Sstevel@tonic-gate int i; 23520Sstevel@tonic-gate conn_t *connp; 23530Sstevel@tonic-gate conn_t *prev_connp; 23540Sstevel@tonic-gate 23550Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 23563448Sdh155122 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23570Sstevel@tonic-gate prev_connp = NULL; 23583448Sdh155122 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 23590Sstevel@tonic-gate while (connp != NULL) { 23600Sstevel@tonic-gate mutex_enter(&connp->conn_lock); 23610Sstevel@tonic-gate if (connp->conn_state_flags & 23620Sstevel@tonic-gate (CONN_CONDEMNED | CONN_INCIPIENT)) { 23630Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 23640Sstevel@tonic-gate connp = connp->conn_g_next; 23650Sstevel@tonic-gate continue; 23660Sstevel@tonic-gate } 23670Sstevel@tonic-gate CONN_INC_REF_LOCKED(connp); 23680Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 23693448Sdh155122 mutex_exit( 23703448Sdh155122 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23710Sstevel@tonic-gate (*func)(connp, arg); 23720Sstevel@tonic-gate if (prev_connp != NULL) 23730Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 23743448Sdh155122 mutex_enter( 23753448Sdh155122 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23760Sstevel@tonic-gate prev_connp = connp; 23770Sstevel@tonic-gate connp = connp->conn_g_next; 23780Sstevel@tonic-gate } 23793448Sdh155122 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23800Sstevel@tonic-gate if (prev_connp != NULL) 23810Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 23820Sstevel@tonic-gate } 23830Sstevel@tonic-gate } 23840Sstevel@tonic-gate 23850Sstevel@tonic-gate /* 23860Sstevel@tonic-gate * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 23870Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 23880Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 23892323Sethindra * (peer tcp in ESTABLISHED state). 23900Sstevel@tonic-gate */ 23910Sstevel@tonic-gate conn_t * 23923448Sdh155122 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph, 23933448Sdh155122 ip_stack_t *ipst) 23940Sstevel@tonic-gate { 23950Sstevel@tonic-gate uint32_t ports; 23960Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 23970Sstevel@tonic-gate connf_t *connfp; 23980Sstevel@tonic-gate conn_t *tconnp; 23990Sstevel@tonic-gate boolean_t zone_chk; 24000Sstevel@tonic-gate 24010Sstevel@tonic-gate /* 24020Sstevel@tonic-gate * If either the source of destination address is loopback, then 24030Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 24040Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 24050Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. 24060Sstevel@tonic-gate */ 24070Sstevel@tonic-gate zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 24080Sstevel@tonic-gate ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 24090Sstevel@tonic-gate 24100Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 24110Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 24120Sstevel@tonic-gate 24133448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 24143448Sdh155122 ports, ipst)]; 24150Sstevel@tonic-gate 24160Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 24170Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 24180Sstevel@tonic-gate tconnp = tconnp->conn_next) { 24190Sstevel@tonic-gate 24200Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 24210Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 24222323Sethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 24230Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 24240Sstevel@tonic-gate 24250Sstevel@tonic-gate ASSERT(tconnp != connp); 24260Sstevel@tonic-gate CONN_INC_REF(tconnp); 24270Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24280Sstevel@tonic-gate return (tconnp); 24290Sstevel@tonic-gate } 24300Sstevel@tonic-gate } 24310Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24320Sstevel@tonic-gate return (NULL); 24330Sstevel@tonic-gate } 24340Sstevel@tonic-gate 24350Sstevel@tonic-gate /* 24360Sstevel@tonic-gate * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 24370Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 24380Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 24392323Sethindra * (peer tcp in ESTABLISHED state). 24400Sstevel@tonic-gate */ 24410Sstevel@tonic-gate conn_t * 24423448Sdh155122 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph, 24433448Sdh155122 ip_stack_t *ipst) 24440Sstevel@tonic-gate { 24450Sstevel@tonic-gate uint32_t ports; 24460Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 24470Sstevel@tonic-gate connf_t *connfp; 24480Sstevel@tonic-gate conn_t *tconnp; 24490Sstevel@tonic-gate boolean_t zone_chk; 24500Sstevel@tonic-gate 24510Sstevel@tonic-gate /* 24520Sstevel@tonic-gate * If either the source of destination address is loopback, then 24530Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 24540Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 24550Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. We 24560Sstevel@tonic-gate * don't do Zone check for link local address(es) because the 24570Sstevel@tonic-gate * current Zone implementation treats each link local address as 24580Sstevel@tonic-gate * being unique per system node, i.e. they belong to global Zone. 24590Sstevel@tonic-gate */ 24600Sstevel@tonic-gate zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 24610Sstevel@tonic-gate IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 24620Sstevel@tonic-gate 24630Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 24640Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 24650Sstevel@tonic-gate 24663448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 24673448Sdh155122 ports, ipst)]; 24680Sstevel@tonic-gate 24690Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 24700Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 24710Sstevel@tonic-gate tconnp = tconnp->conn_next) { 24720Sstevel@tonic-gate 24730Sstevel@tonic-gate /* We skip tcp_bound_if check here as this is loopback tcp */ 24740Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 24750Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 24762323Sethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 24770Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 24780Sstevel@tonic-gate 24790Sstevel@tonic-gate ASSERT(tconnp != connp); 24800Sstevel@tonic-gate CONN_INC_REF(tconnp); 24810Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24820Sstevel@tonic-gate return (tconnp); 24830Sstevel@tonic-gate } 24840Sstevel@tonic-gate } 24850Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24860Sstevel@tonic-gate return (NULL); 24870Sstevel@tonic-gate } 24880Sstevel@tonic-gate 24890Sstevel@tonic-gate /* 24900Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 24910Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 24920Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 24930Sstevel@tonic-gate */ 24940Sstevel@tonic-gate conn_t * 24953448Sdh155122 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state, 24963448Sdh155122 ip_stack_t *ipst) 24970Sstevel@tonic-gate { 24980Sstevel@tonic-gate uint32_t ports; 24990Sstevel@tonic-gate uint16_t *pports; 25000Sstevel@tonic-gate connf_t *connfp; 25010Sstevel@tonic-gate conn_t *tconnp; 25020Sstevel@tonic-gate 25030Sstevel@tonic-gate pports = (uint16_t *)&ports; 25040Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 25050Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 25060Sstevel@tonic-gate 25073448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 25084691Skcpoon ports, ipst)]; 25090Sstevel@tonic-gate 25100Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25110Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25120Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25130Sstevel@tonic-gate 25140Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 25150Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 25160Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= min_state) { 25170Sstevel@tonic-gate 25180Sstevel@tonic-gate CONN_INC_REF(tconnp); 25190Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25200Sstevel@tonic-gate return (tconnp); 25210Sstevel@tonic-gate } 25220Sstevel@tonic-gate } 25230Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25240Sstevel@tonic-gate return (NULL); 25250Sstevel@tonic-gate } 25260Sstevel@tonic-gate 25270Sstevel@tonic-gate /* 25280Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 25290Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 25300Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 25310Sstevel@tonic-gate * Match on ifindex in addition to addresses. 25320Sstevel@tonic-gate */ 25330Sstevel@tonic-gate conn_t * 25340Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 25353448Sdh155122 uint_t ifindex, ip_stack_t *ipst) 25360Sstevel@tonic-gate { 25370Sstevel@tonic-gate tcp_t *tcp; 25380Sstevel@tonic-gate uint32_t ports; 25390Sstevel@tonic-gate uint16_t *pports; 25400Sstevel@tonic-gate connf_t *connfp; 25410Sstevel@tonic-gate conn_t *tconnp; 25420Sstevel@tonic-gate 25430Sstevel@tonic-gate pports = (uint16_t *)&ports; 25440Sstevel@tonic-gate pports[0] = tcpha->tha_fport; 25450Sstevel@tonic-gate pports[1] = tcpha->tha_lport; 25460Sstevel@tonic-gate 25473448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 25484691Skcpoon ports, ipst)]; 25490Sstevel@tonic-gate 25500Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25510Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25520Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25530Sstevel@tonic-gate 25540Sstevel@tonic-gate tcp = tconnp->conn_tcp; 25550Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 25560Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 25570Sstevel@tonic-gate tcp->tcp_state >= min_state && 25580Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 25590Sstevel@tonic-gate tcp->tcp_bound_if == ifindex)) { 25600Sstevel@tonic-gate 25610Sstevel@tonic-gate CONN_INC_REF(tconnp); 25620Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25630Sstevel@tonic-gate return (tconnp); 25640Sstevel@tonic-gate } 25650Sstevel@tonic-gate } 25660Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25670Sstevel@tonic-gate return (NULL); 25680Sstevel@tonic-gate } 25690Sstevel@tonic-gate 25700Sstevel@tonic-gate /* 25711676Sjpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 25721676Sjpk * a listener when changing state. 25730Sstevel@tonic-gate */ 25740Sstevel@tonic-gate conn_t * 25753448Sdh155122 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 25763448Sdh155122 ip_stack_t *ipst) 25770Sstevel@tonic-gate { 25780Sstevel@tonic-gate connf_t *bind_connfp; 25790Sstevel@tonic-gate conn_t *connp; 25800Sstevel@tonic-gate tcp_t *tcp; 25810Sstevel@tonic-gate 25820Sstevel@tonic-gate /* 25830Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 25840Sstevel@tonic-gate * all zeros. 25850Sstevel@tonic-gate */ 25860Sstevel@tonic-gate if (laddr == 0) 25870Sstevel@tonic-gate return (NULL); 25880Sstevel@tonic-gate 25891676Sjpk ASSERT(zoneid != ALL_ZONES); 25901676Sjpk 25913448Sdh155122 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 25920Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 25930Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 25940Sstevel@tonic-gate connp = connp->conn_next) { 25950Sstevel@tonic-gate tcp = connp->conn_tcp; 25960Sstevel@tonic-gate if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 25972263Ssommerfe IPCL_ZONE_MATCH(connp, zoneid) && 25980Sstevel@tonic-gate (tcp->tcp_listener == NULL)) { 25990Sstevel@tonic-gate CONN_INC_REF(connp); 26000Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26010Sstevel@tonic-gate return (connp); 26020Sstevel@tonic-gate } 26030Sstevel@tonic-gate } 26040Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26050Sstevel@tonic-gate return (NULL); 26060Sstevel@tonic-gate } 26070Sstevel@tonic-gate 26081676Sjpk /* 26091676Sjpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 26101676Sjpk * a listener when changing state. 26111676Sjpk */ 26120Sstevel@tonic-gate conn_t * 26130Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 26143448Sdh155122 zoneid_t zoneid, ip_stack_t *ipst) 26150Sstevel@tonic-gate { 26160Sstevel@tonic-gate connf_t *bind_connfp; 26170Sstevel@tonic-gate conn_t *connp = NULL; 26180Sstevel@tonic-gate tcp_t *tcp; 26190Sstevel@tonic-gate 26200Sstevel@tonic-gate /* 26210Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 26220Sstevel@tonic-gate * all zeros. 26230Sstevel@tonic-gate */ 26240Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 26250Sstevel@tonic-gate return (NULL); 26260Sstevel@tonic-gate 26271676Sjpk ASSERT(zoneid != ALL_ZONES); 26280Sstevel@tonic-gate 26293448Sdh155122 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 26300Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 26310Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 26320Sstevel@tonic-gate connp = connp->conn_next) { 26330Sstevel@tonic-gate tcp = connp->conn_tcp; 26340Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 26352263Ssommerfe IPCL_ZONE_MATCH(connp, zoneid) && 26360Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 26370Sstevel@tonic-gate tcp->tcp_bound_if == ifindex) && 26380Sstevel@tonic-gate tcp->tcp_listener == NULL) { 26390Sstevel@tonic-gate CONN_INC_REF(connp); 26400Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26410Sstevel@tonic-gate return (connp); 26420Sstevel@tonic-gate } 26430Sstevel@tonic-gate } 26440Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26450Sstevel@tonic-gate return (NULL); 26460Sstevel@tonic-gate } 26470Sstevel@tonic-gate 2648741Smasputra /* 2649741Smasputra * ipcl_get_next_conn 2650741Smasputra * get the next entry in the conn global list 2651741Smasputra * and put a reference on the next_conn. 2652741Smasputra * decrement the reference on the current conn. 2653741Smasputra * 2654741Smasputra * This is an iterator based walker function that also provides for 2655741Smasputra * some selection by the caller. It walks through the conn_hash bucket 2656741Smasputra * searching for the next valid connp in the list, and selects connections 2657741Smasputra * that are neither closed nor condemned. It also REFHOLDS the conn 2658741Smasputra * thus ensuring that the conn exists when the caller uses the conn. 2659741Smasputra */ 2660741Smasputra conn_t * 2661741Smasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2662741Smasputra { 2663741Smasputra conn_t *next_connp; 2664741Smasputra 2665741Smasputra if (connfp == NULL) 2666741Smasputra return (NULL); 2667741Smasputra 2668741Smasputra mutex_enter(&connfp->connf_lock); 2669741Smasputra 2670741Smasputra next_connp = (connp == NULL) ? 2671741Smasputra connfp->connf_head : connp->conn_g_next; 2672741Smasputra 2673741Smasputra while (next_connp != NULL) { 2674741Smasputra mutex_enter(&next_connp->conn_lock); 2675741Smasputra if (!(next_connp->conn_flags & conn_flags) || 2676741Smasputra (next_connp->conn_state_flags & 2677741Smasputra (CONN_CONDEMNED | CONN_INCIPIENT))) { 2678741Smasputra /* 2679741Smasputra * This conn has been condemned or 2680741Smasputra * is closing, or the flags don't match 2681741Smasputra */ 2682741Smasputra mutex_exit(&next_connp->conn_lock); 2683741Smasputra next_connp = next_connp->conn_g_next; 2684741Smasputra continue; 2685741Smasputra } 2686741Smasputra CONN_INC_REF_LOCKED(next_connp); 2687741Smasputra mutex_exit(&next_connp->conn_lock); 2688741Smasputra break; 2689741Smasputra } 2690741Smasputra 2691741Smasputra mutex_exit(&connfp->connf_lock); 2692741Smasputra 2693741Smasputra if (connp != NULL) 2694741Smasputra CONN_DEC_REF(connp); 2695741Smasputra 2696741Smasputra return (next_connp); 2697741Smasputra } 2698741Smasputra 26990Sstevel@tonic-gate #ifdef CONN_DEBUG 27000Sstevel@tonic-gate /* 27010Sstevel@tonic-gate * Trace of the last NBUF refhold/refrele 27020Sstevel@tonic-gate */ 27030Sstevel@tonic-gate int 27040Sstevel@tonic-gate conn_trace_ref(conn_t *connp) 27050Sstevel@tonic-gate { 27060Sstevel@tonic-gate int last; 27070Sstevel@tonic-gate conn_trace_t *ctb; 27080Sstevel@tonic-gate 27090Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 27100Sstevel@tonic-gate last = connp->conn_trace_last; 27110Sstevel@tonic-gate last++; 27120Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 27130Sstevel@tonic-gate last = 0; 27140Sstevel@tonic-gate 27150Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 27165023Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 27170Sstevel@tonic-gate connp->conn_trace_last = last; 27180Sstevel@tonic-gate return (1); 27190Sstevel@tonic-gate } 27200Sstevel@tonic-gate 27210Sstevel@tonic-gate int 27220Sstevel@tonic-gate conn_untrace_ref(conn_t *connp) 27230Sstevel@tonic-gate { 27240Sstevel@tonic-gate int last; 27250Sstevel@tonic-gate conn_trace_t *ctb; 27260Sstevel@tonic-gate 27270Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 27280Sstevel@tonic-gate last = connp->conn_trace_last; 27290Sstevel@tonic-gate last++; 27300Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 27310Sstevel@tonic-gate last = 0; 27320Sstevel@tonic-gate 27330Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 27345023Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 27350Sstevel@tonic-gate connp->conn_trace_last = last; 27360Sstevel@tonic-gate return (1); 27370Sstevel@tonic-gate } 27380Sstevel@tonic-gate #endif 2739