10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51503Sericheng * Common Development and Distribution License (the "License"). 61503Sericheng * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 221503Sericheng * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 28*1676Sjpk const char ipclassifier_version[] = "@(#)ipclassifier.c %I% %E% SMI"; 290Sstevel@tonic-gate 300Sstevel@tonic-gate /* 310Sstevel@tonic-gate * IP PACKET CLASSIFIER 320Sstevel@tonic-gate * 330Sstevel@tonic-gate * The IP packet classifier provides mapping between IP packets and persistent 340Sstevel@tonic-gate * connection state for connection-oriented protocols. It also provides 350Sstevel@tonic-gate * interface for managing connection states. 360Sstevel@tonic-gate * 370Sstevel@tonic-gate * The connection state is kept in conn_t data structure and contains, among 380Sstevel@tonic-gate * other things: 390Sstevel@tonic-gate * 400Sstevel@tonic-gate * o local/remote address and ports 410Sstevel@tonic-gate * o Transport protocol 420Sstevel@tonic-gate * o squeue for the connection (for TCP only) 430Sstevel@tonic-gate * o reference counter 440Sstevel@tonic-gate * o Connection state 450Sstevel@tonic-gate * o hash table linkage 460Sstevel@tonic-gate * o interface/ire information 470Sstevel@tonic-gate * o credentials 480Sstevel@tonic-gate * o ipsec policy 490Sstevel@tonic-gate * o send and receive functions. 500Sstevel@tonic-gate * o mutex lock. 510Sstevel@tonic-gate * 520Sstevel@tonic-gate * Connections use a reference counting scheme. They are freed when the 530Sstevel@tonic-gate * reference counter drops to zero. A reference is incremented when connection 540Sstevel@tonic-gate * is placed in a list or table, when incoming packet for the connection arrives 550Sstevel@tonic-gate * and when connection is processed via squeue (squeue processing may be 560Sstevel@tonic-gate * asynchronous and the reference protects the connection from being destroyed 570Sstevel@tonic-gate * before its processing is finished). 580Sstevel@tonic-gate * 590Sstevel@tonic-gate * send and receive functions are currently used for TCP only. The send function 600Sstevel@tonic-gate * determines the IP entry point for the packet once it leaves TCP to be sent to 610Sstevel@tonic-gate * the destination address. The receive function is used by IP when the packet 620Sstevel@tonic-gate * should be passed for TCP processing. When a new connection is created these 630Sstevel@tonic-gate * are set to ip_output() and tcp_input() respectively. During the lifetime of 640Sstevel@tonic-gate * the connection the send and receive functions may change depending on the 650Sstevel@tonic-gate * changes in the connection state. For example, Once the connection is bound to 660Sstevel@tonic-gate * an addresse, the receive function for this connection is set to 670Sstevel@tonic-gate * tcp_conn_request(). This allows incoming SYNs to go directly into the 680Sstevel@tonic-gate * listener SYN processing function without going to tcp_input() first. 690Sstevel@tonic-gate * 700Sstevel@tonic-gate * Classifier uses several hash tables: 710Sstevel@tonic-gate * 720Sstevel@tonic-gate * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 730Sstevel@tonic-gate * ipcl_bind_fanout: contains all connections in BOUND state 740Sstevel@tonic-gate * ipcl_proto_fanout: IPv4 protocol fanout 750Sstevel@tonic-gate * ipcl_proto_fanout_v6: IPv6 protocol fanout 760Sstevel@tonic-gate * ipcl_udp_fanout: contains all UDP connections 770Sstevel@tonic-gate * ipcl_globalhash_fanout: contains all connections 780Sstevel@tonic-gate * 790Sstevel@tonic-gate * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 800Sstevel@tonic-gate * which need to view all existing connections. 810Sstevel@tonic-gate * 820Sstevel@tonic-gate * All tables are protected by per-bucket locks. When both per-bucket lock and 830Sstevel@tonic-gate * connection lock need to be held, the per-bucket lock should be acquired 840Sstevel@tonic-gate * first, followed by the connection lock. 850Sstevel@tonic-gate * 860Sstevel@tonic-gate * All functions doing search in one of these tables increment a reference 870Sstevel@tonic-gate * counter on the connection found (if any). This reference should be dropped 880Sstevel@tonic-gate * when the caller has finished processing the connection. 890Sstevel@tonic-gate * 900Sstevel@tonic-gate * 910Sstevel@tonic-gate * INTERFACES: 920Sstevel@tonic-gate * =========== 930Sstevel@tonic-gate * 940Sstevel@tonic-gate * Connection Lookup: 950Sstevel@tonic-gate * ------------------ 960Sstevel@tonic-gate * 970Sstevel@tonic-gate * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid) 980Sstevel@tonic-gate * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid) 990Sstevel@tonic-gate * 1000Sstevel@tonic-gate * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 1010Sstevel@tonic-gate * it can't find any associated connection. If the connection is found, its 1020Sstevel@tonic-gate * reference counter is incremented. 1030Sstevel@tonic-gate * 1040Sstevel@tonic-gate * mp: mblock, containing packet header. The full header should fit 1050Sstevel@tonic-gate * into a single mblock. It should also contain at least full IP 1060Sstevel@tonic-gate * and TCP or UDP header. 1070Sstevel@tonic-gate * 1080Sstevel@tonic-gate * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 1090Sstevel@tonic-gate * 1100Sstevel@tonic-gate * hdr_len: The size of IP header. It is used to find TCP or UDP header in 1110Sstevel@tonic-gate * the packet. 1120Sstevel@tonic-gate * 113*1676Sjpk * zoneid: The zone in which the returned connection must be; the zoneid 114*1676Sjpk * corresponding to the ire_zoneid on the IRE located for the 115*1676Sjpk * packet's destination address. 1160Sstevel@tonic-gate * 1170Sstevel@tonic-gate * For TCP connections, the lookup order is as follows: 1180Sstevel@tonic-gate * 5-tuple {src, dst, protocol, local port, remote port} 1190Sstevel@tonic-gate * lookup in ipcl_conn_fanout table. 1200Sstevel@tonic-gate * 3-tuple {dst, remote port, protocol} lookup in 1210Sstevel@tonic-gate * ipcl_bind_fanout table. 1220Sstevel@tonic-gate * 1230Sstevel@tonic-gate * For UDP connections, a 5-tuple {src, dst, protocol, local port, 1240Sstevel@tonic-gate * remote port} lookup is done on ipcl_udp_fanout. Note that, 1250Sstevel@tonic-gate * these interfaces do not handle cases where a packets belongs 1260Sstevel@tonic-gate * to multiple UDP clients, which is handled in IP itself. 1270Sstevel@tonic-gate * 128*1676Sjpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 129*1676Sjpk * determine which actual zone gets the segment. This is used only in a 130*1676Sjpk * labeled environment. The matching rules are: 131*1676Sjpk * 132*1676Sjpk * - If it's not a multilevel port, then the label on the packet selects 133*1676Sjpk * the zone. Unlabeled packets are delivered to the global zone. 134*1676Sjpk * 135*1676Sjpk * - If it's a multilevel port, then only the zone registered to receive 136*1676Sjpk * packets on that port matches. 137*1676Sjpk * 138*1676Sjpk * Also, in a labeled environment, packet labels need to be checked. For fully 139*1676Sjpk * bound TCP connections, we can assume that the packet label was checked 140*1676Sjpk * during connection establishment, and doesn't need to be checked on each 141*1676Sjpk * packet. For others, though, we need to check for strict equality or, for 142*1676Sjpk * multilevel ports, membership in the range or set. This part currently does 143*1676Sjpk * a tnrh lookup on each packet, but could be optimized to use cached results 144*1676Sjpk * if that were necessary. (SCTP doesn't come through here, but if it did, 145*1676Sjpk * we would apply the same rules as TCP.) 146*1676Sjpk * 147*1676Sjpk * An implication of the above is that fully-bound TCP sockets must always use 148*1676Sjpk * distinct 4-tuples; they can't be discriminated by label alone. 149*1676Sjpk * 150*1676Sjpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 151*1676Sjpk * as there's no connection set-up handshake and no shared state. 152*1676Sjpk * 153*1676Sjpk * Labels on looped-back packets within a single zone do not need to be 154*1676Sjpk * checked, as all processes in the same zone have the same label. 155*1676Sjpk * 156*1676Sjpk * Finally, for unlabeled packets received by a labeled system, special rules 157*1676Sjpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a 158*1676Sjpk * socket in the zone whose label matches the default label of the sender, if 159*1676Sjpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 160*1676Sjpk * receiver's label must dominate the sender's default label. 161*1676Sjpk * 1620Sstevel@tonic-gate * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int); 1630Sstevel@tonic-gate * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t); 1640Sstevel@tonic-gate * 1650Sstevel@tonic-gate * Lookup routine to find a exact match for {src, dst, local port, 1660Sstevel@tonic-gate * remote port) for TCP connections in ipcl_conn_fanout. The address and 1670Sstevel@tonic-gate * ports are read from the IP and TCP header respectively. 1680Sstevel@tonic-gate * 1690Sstevel@tonic-gate * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol); 1700Sstevel@tonic-gate * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex); 1710Sstevel@tonic-gate * 1720Sstevel@tonic-gate * Lookup routine to find a listener with the tuple {lport, laddr, 1730Sstevel@tonic-gate * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 1740Sstevel@tonic-gate * parameter interface index is also compared. 1750Sstevel@tonic-gate * 1760Sstevel@tonic-gate * void ipcl_walk(func, arg) 1770Sstevel@tonic-gate * 1780Sstevel@tonic-gate * Apply 'func' to every connection available. The 'func' is called as 1790Sstevel@tonic-gate * (*func)(connp, arg). The walk is non-atomic so connections may be 1800Sstevel@tonic-gate * created and destroyed during the walk. The CONN_CONDEMNED and 1810Sstevel@tonic-gate * CONN_INCIPIENT flags ensure that connections which are newly created 1820Sstevel@tonic-gate * or being destroyed are not selected by the walker. 1830Sstevel@tonic-gate * 1840Sstevel@tonic-gate * Table Updates 1850Sstevel@tonic-gate * ------------- 1860Sstevel@tonic-gate * 1870Sstevel@tonic-gate * int ipcl_conn_insert(connp, protocol, src, dst, ports) 1880Sstevel@tonic-gate * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 1890Sstevel@tonic-gate * 1900Sstevel@tonic-gate * Insert 'connp' in the ipcl_conn_fanout. 1910Sstevel@tonic-gate * Arguements : 1920Sstevel@tonic-gate * connp conn_t to be inserted 1930Sstevel@tonic-gate * protocol connection protocol 1940Sstevel@tonic-gate * src source address 1950Sstevel@tonic-gate * dst destination address 1960Sstevel@tonic-gate * ports local and remote port 1970Sstevel@tonic-gate * ifindex interface index for IPv6 connections 1980Sstevel@tonic-gate * 1990Sstevel@tonic-gate * Return value : 2000Sstevel@tonic-gate * 0 if connp was inserted 2010Sstevel@tonic-gate * EADDRINUSE if the connection with the same tuple 2020Sstevel@tonic-gate * already exists. 2030Sstevel@tonic-gate * 2040Sstevel@tonic-gate * int ipcl_bind_insert(connp, protocol, src, lport); 2050Sstevel@tonic-gate * int ipcl_bind_insert_v6(connp, protocol, src, lport); 2060Sstevel@tonic-gate * 2070Sstevel@tonic-gate * Insert 'connp' in ipcl_bind_fanout. 2080Sstevel@tonic-gate * Arguements : 2090Sstevel@tonic-gate * connp conn_t to be inserted 2100Sstevel@tonic-gate * protocol connection protocol 2110Sstevel@tonic-gate * src source address connection wants 2120Sstevel@tonic-gate * to bind to 2130Sstevel@tonic-gate * lport local port connection wants to 2140Sstevel@tonic-gate * bind to 2150Sstevel@tonic-gate * 2160Sstevel@tonic-gate * 2170Sstevel@tonic-gate * void ipcl_hash_remove(connp); 2180Sstevel@tonic-gate * 2190Sstevel@tonic-gate * Removes the 'connp' from the connection fanout table. 2200Sstevel@tonic-gate * 2210Sstevel@tonic-gate * Connection Creation/Destruction 2220Sstevel@tonic-gate * ------------------------------- 2230Sstevel@tonic-gate * 2240Sstevel@tonic-gate * conn_t *ipcl_conn_create(type, sleep) 2250Sstevel@tonic-gate * 2260Sstevel@tonic-gate * Creates a new conn based on the type flag, inserts it into 2270Sstevel@tonic-gate * globalhash table. 2280Sstevel@tonic-gate * 2290Sstevel@tonic-gate * type: This flag determines the type of conn_t which needs to be 2300Sstevel@tonic-gate * created. 2310Sstevel@tonic-gate * IPCL_TCPCONN indicates a TCP connection 2320Sstevel@tonic-gate * IPCL_IPCONN indicates all non-TCP connections. 2330Sstevel@tonic-gate * 2340Sstevel@tonic-gate * void ipcl_conn_destroy(connp) 2350Sstevel@tonic-gate * 2360Sstevel@tonic-gate * Destroys the connection state, removes it from the global 2370Sstevel@tonic-gate * connection hash table and frees its memory. 2380Sstevel@tonic-gate */ 2390Sstevel@tonic-gate 2400Sstevel@tonic-gate #include <sys/types.h> 2410Sstevel@tonic-gate #include <sys/stream.h> 2420Sstevel@tonic-gate #include <sys/stropts.h> 2430Sstevel@tonic-gate #include <sys/sysmacros.h> 2440Sstevel@tonic-gate #include <sys/strsubr.h> 2450Sstevel@tonic-gate #include <sys/strsun.h> 2460Sstevel@tonic-gate #define _SUN_TPI_VERSION 2 2470Sstevel@tonic-gate #include <sys/ddi.h> 2480Sstevel@tonic-gate #include <sys/cmn_err.h> 2490Sstevel@tonic-gate #include <sys/debug.h> 2500Sstevel@tonic-gate 2510Sstevel@tonic-gate #include <sys/systm.h> 2520Sstevel@tonic-gate #include <sys/param.h> 2530Sstevel@tonic-gate #include <sys/kmem.h> 2540Sstevel@tonic-gate #include <sys/isa_defs.h> 2550Sstevel@tonic-gate #include <inet/common.h> 2560Sstevel@tonic-gate #include <netinet/ip6.h> 2570Sstevel@tonic-gate #include <netinet/icmp6.h> 2580Sstevel@tonic-gate 2590Sstevel@tonic-gate #include <inet/ip.h> 2600Sstevel@tonic-gate #include <inet/ip6.h> 2610Sstevel@tonic-gate #include <inet/tcp.h> 2620Sstevel@tonic-gate #include <inet/ip_ndp.h> 263741Smasputra #include <inet/udp_impl.h> 2640Sstevel@tonic-gate #include <inet/sctp_ip.h> 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate #include <sys/cpuvar.h> 2670Sstevel@tonic-gate 2680Sstevel@tonic-gate #include <inet/ipclassifier.h> 2690Sstevel@tonic-gate #include <inet/ipsec_impl.h> 2700Sstevel@tonic-gate 271*1676Sjpk #include <sys/tsol/tnet.h> 272*1676Sjpk 2730Sstevel@tonic-gate #ifdef DEBUG 2740Sstevel@tonic-gate #define IPCL_DEBUG 2750Sstevel@tonic-gate #else 2760Sstevel@tonic-gate #undef IPCL_DEBUG 2770Sstevel@tonic-gate #endif 2780Sstevel@tonic-gate 2790Sstevel@tonic-gate #ifdef IPCL_DEBUG 2800Sstevel@tonic-gate int ipcl_debug_level = 0; 2810Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) \ 2820Sstevel@tonic-gate if (ipcl_debug_level & level) { printf args; } 2830Sstevel@tonic-gate #else 2840Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) {; } 2850Sstevel@tonic-gate #endif 2860Sstevel@tonic-gate connf_t *ipcl_conn_fanout; 2870Sstevel@tonic-gate connf_t *ipcl_bind_fanout; 2880Sstevel@tonic-gate connf_t ipcl_proto_fanout[IPPROTO_MAX + 1]; 2890Sstevel@tonic-gate connf_t ipcl_proto_fanout_v6[IPPROTO_MAX + 1]; 2900Sstevel@tonic-gate connf_t *ipcl_udp_fanout; 2910Sstevel@tonic-gate 2920Sstevel@tonic-gate /* A separate hash list for raw socket. */ 2930Sstevel@tonic-gate connf_t *ipcl_raw_fanout; 2940Sstevel@tonic-gate 2950Sstevel@tonic-gate connf_t rts_clients; 2960Sstevel@tonic-gate 2970Sstevel@tonic-gate /* Old value for compatibility */ 2980Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0; 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate /* New value. Zero means choose automatically. */ 3010Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0; 3020Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192; 3030Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500; 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate uint_t ipcl_conn_fanout_size = 0; 3060Sstevel@tonic-gate 3070Sstevel@tonic-gate 3080Sstevel@tonic-gate /* bind/udp fanout table size */ 3090Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512; 3101503Sericheng uint_t ipcl_udp_fanout_size = 16384; 3110Sstevel@tonic-gate 3120Sstevel@tonic-gate /* Raw socket fanout size. Must be a power of 2. */ 3130Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256; 3140Sstevel@tonic-gate 3150Sstevel@tonic-gate /* 3160Sstevel@tonic-gate * Power of 2^N Primes useful for hashing for N of 0-28, 3170Sstevel@tonic-gate * these primes are the nearest prime <= 2^N - 2^(N-2). 3180Sstevel@tonic-gate */ 3190Sstevel@tonic-gate 3200Sstevel@tonic-gate #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 3210Sstevel@tonic-gate 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 3220Sstevel@tonic-gate 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 3230Sstevel@tonic-gate 50331599, 100663291, 201326557, 0} 3240Sstevel@tonic-gate 3250Sstevel@tonic-gate /* 3260Sstevel@tonic-gate * wrapper structure to ensure that conn+tcpb are aligned 3270Sstevel@tonic-gate * on cache lines. 3280Sstevel@tonic-gate */ 3290Sstevel@tonic-gate typedef struct itc_s { 3300Sstevel@tonic-gate union { 3310Sstevel@tonic-gate conn_t itcu_conn; 3320Sstevel@tonic-gate char itcu_filler[CACHE_ALIGN(conn_s)]; 3330Sstevel@tonic-gate } itc_u; 3340Sstevel@tonic-gate tcp_t itc_tcp; 3350Sstevel@tonic-gate } itc_t; 3360Sstevel@tonic-gate 3370Sstevel@tonic-gate #define itc_conn itc_u.itcu_conn 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate struct kmem_cache *ipcl_tcpconn_cache; 3400Sstevel@tonic-gate struct kmem_cache *ipcl_tcp_cache; 3410Sstevel@tonic-gate struct kmem_cache *ipcl_conn_cache; 3420Sstevel@tonic-gate extern struct kmem_cache *sctp_conn_cache; 3430Sstevel@tonic-gate extern struct kmem_cache *tcp_sack_info_cache; 3440Sstevel@tonic-gate extern struct kmem_cache *tcp_iphc_cache; 3450Sstevel@tonic-gate 3460Sstevel@tonic-gate extern void tcp_timermp_free(tcp_t *); 3470Sstevel@tonic-gate extern mblk_t *tcp_timermp_alloc(int); 3480Sstevel@tonic-gate 3490Sstevel@tonic-gate static int ipcl_tcpconn_constructor(void *, void *, int); 3500Sstevel@tonic-gate static void ipcl_tcpconn_destructor(void *, void *); 3510Sstevel@tonic-gate 3520Sstevel@tonic-gate static int conn_g_index; 3530Sstevel@tonic-gate connf_t *ipcl_globalhash_fanout; 3540Sstevel@tonic-gate 3550Sstevel@tonic-gate #ifdef IPCL_DEBUG 3560Sstevel@tonic-gate #define INET_NTOA_BUFSIZE 18 3570Sstevel@tonic-gate 3580Sstevel@tonic-gate static char * 3590Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b) 3600Sstevel@tonic-gate { 3610Sstevel@tonic-gate unsigned char *p; 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate p = (unsigned char *)∈ 3640Sstevel@tonic-gate (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 3650Sstevel@tonic-gate return (b); 3660Sstevel@tonic-gate } 3670Sstevel@tonic-gate #endif 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate /* 3700Sstevel@tonic-gate * ipclassifier intialization routine, sets up hash tables and 3710Sstevel@tonic-gate * conn caches. 3720Sstevel@tonic-gate */ 3730Sstevel@tonic-gate void 3740Sstevel@tonic-gate ipcl_init(void) 3750Sstevel@tonic-gate { 3760Sstevel@tonic-gate int i; 3770Sstevel@tonic-gate int sizes[] = P2Ps(); 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache", 3800Sstevel@tonic-gate sizeof (conn_t), CACHE_ALIGN_SIZE, 381741Smasputra NULL, NULL, NULL, NULL, NULL, 0); 3820Sstevel@tonic-gate 3830Sstevel@tonic-gate ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache", 3840Sstevel@tonic-gate sizeof (itc_t), CACHE_ALIGN_SIZE, 3850Sstevel@tonic-gate ipcl_tcpconn_constructor, ipcl_tcpconn_destructor, 3860Sstevel@tonic-gate NULL, NULL, NULL, 0); 3870Sstevel@tonic-gate 3880Sstevel@tonic-gate /* 3890Sstevel@tonic-gate * Calculate size of conn fanout table. 3900Sstevel@tonic-gate */ 3910Sstevel@tonic-gate if (ipcl_conn_hash_size != 0) { 3920Sstevel@tonic-gate ipcl_conn_fanout_size = ipcl_conn_hash_size; 3930Sstevel@tonic-gate } else if (tcp_conn_hash_size != 0) { 3940Sstevel@tonic-gate ipcl_conn_fanout_size = tcp_conn_hash_size; 3950Sstevel@tonic-gate } else { 3960Sstevel@tonic-gate extern pgcnt_t freemem; 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate ipcl_conn_fanout_size = 3990Sstevel@tonic-gate (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 4000Sstevel@tonic-gate 4010Sstevel@tonic-gate if (ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) 4020Sstevel@tonic-gate ipcl_conn_fanout_size = ipcl_conn_hash_maxsize; 4030Sstevel@tonic-gate } 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 4060Sstevel@tonic-gate if (sizes[i] >= ipcl_conn_fanout_size) { 4070Sstevel@tonic-gate break; 4080Sstevel@tonic-gate } 4090Sstevel@tonic-gate } 4100Sstevel@tonic-gate if ((ipcl_conn_fanout_size = sizes[i]) == 0) { 4110Sstevel@tonic-gate /* Out of range, use the 2^16 value */ 4120Sstevel@tonic-gate ipcl_conn_fanout_size = sizes[16]; 4130Sstevel@tonic-gate } 4140Sstevel@tonic-gate ipcl_conn_fanout = (connf_t *)kmem_zalloc(ipcl_conn_fanout_size * 4150Sstevel@tonic-gate sizeof (*ipcl_conn_fanout), KM_SLEEP); 4160Sstevel@tonic-gate 4170Sstevel@tonic-gate for (i = 0; i < ipcl_conn_fanout_size; i++) { 4180Sstevel@tonic-gate mutex_init(&ipcl_conn_fanout[i].connf_lock, NULL, 4190Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4200Sstevel@tonic-gate } 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate ipcl_bind_fanout = (connf_t *)kmem_zalloc(ipcl_bind_fanout_size * 4230Sstevel@tonic-gate sizeof (*ipcl_bind_fanout), KM_SLEEP); 4240Sstevel@tonic-gate 4250Sstevel@tonic-gate for (i = 0; i < ipcl_bind_fanout_size; i++) { 4260Sstevel@tonic-gate mutex_init(&ipcl_bind_fanout[i].connf_lock, NULL, 4270Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4280Sstevel@tonic-gate } 4290Sstevel@tonic-gate 4300Sstevel@tonic-gate for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) { 4310Sstevel@tonic-gate mutex_init(&ipcl_proto_fanout[i].connf_lock, NULL, 4320Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4330Sstevel@tonic-gate } 4340Sstevel@tonic-gate for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) { 4350Sstevel@tonic-gate mutex_init(&ipcl_proto_fanout_v6[i].connf_lock, NULL, 4360Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4370Sstevel@tonic-gate } 4380Sstevel@tonic-gate 4390Sstevel@tonic-gate mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL); 4400Sstevel@tonic-gate 4410Sstevel@tonic-gate ipcl_udp_fanout = (connf_t *)kmem_zalloc(ipcl_udp_fanout_size * 4420Sstevel@tonic-gate sizeof (*ipcl_udp_fanout), KM_SLEEP); 4430Sstevel@tonic-gate 4440Sstevel@tonic-gate for (i = 0; i < ipcl_udp_fanout_size; i++) { 4450Sstevel@tonic-gate mutex_init(&ipcl_udp_fanout[i].connf_lock, NULL, 4460Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4470Sstevel@tonic-gate } 4480Sstevel@tonic-gate 4490Sstevel@tonic-gate ipcl_raw_fanout = (connf_t *)kmem_zalloc(ipcl_raw_fanout_size * 4500Sstevel@tonic-gate sizeof (*ipcl_raw_fanout), KM_SLEEP); 4510Sstevel@tonic-gate 4520Sstevel@tonic-gate for (i = 0; i < ipcl_raw_fanout_size; i++) { 4530Sstevel@tonic-gate mutex_init(&ipcl_raw_fanout[i].connf_lock, NULL, 4540Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4550Sstevel@tonic-gate } 4560Sstevel@tonic-gate 4570Sstevel@tonic-gate ipcl_globalhash_fanout = (connf_t *)kmem_zalloc(sizeof (connf_t) * 4580Sstevel@tonic-gate CONN_G_HASH_SIZE, KM_SLEEP); 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4610Sstevel@tonic-gate mutex_init(&ipcl_globalhash_fanout[i].connf_lock, NULL, 4620Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4630Sstevel@tonic-gate } 4640Sstevel@tonic-gate } 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate void 4670Sstevel@tonic-gate ipcl_destroy(void) 4680Sstevel@tonic-gate { 4690Sstevel@tonic-gate int i; 4700Sstevel@tonic-gate kmem_cache_destroy(ipcl_conn_cache); 4710Sstevel@tonic-gate kmem_cache_destroy(ipcl_tcpconn_cache); 4720Sstevel@tonic-gate for (i = 0; i < ipcl_conn_fanout_size; i++) 4730Sstevel@tonic-gate mutex_destroy(&ipcl_conn_fanout[i].connf_lock); 4740Sstevel@tonic-gate kmem_free(ipcl_conn_fanout, ipcl_conn_fanout_size * 4750Sstevel@tonic-gate sizeof (*ipcl_conn_fanout)); 4760Sstevel@tonic-gate for (i = 0; i < ipcl_bind_fanout_size; i++) 4770Sstevel@tonic-gate mutex_destroy(&ipcl_bind_fanout[i].connf_lock); 4780Sstevel@tonic-gate kmem_free(ipcl_bind_fanout, ipcl_bind_fanout_size * 4790Sstevel@tonic-gate sizeof (*ipcl_bind_fanout)); 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) 4820Sstevel@tonic-gate mutex_destroy(&ipcl_proto_fanout[i].connf_lock); 4830Sstevel@tonic-gate for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) 4840Sstevel@tonic-gate mutex_destroy(&ipcl_proto_fanout_v6[i].connf_lock); 4850Sstevel@tonic-gate 4860Sstevel@tonic-gate for (i = 0; i < ipcl_udp_fanout_size; i++) 4870Sstevel@tonic-gate mutex_destroy(&ipcl_udp_fanout[i].connf_lock); 4880Sstevel@tonic-gate kmem_free(ipcl_udp_fanout, ipcl_udp_fanout_size * 4890Sstevel@tonic-gate sizeof (*ipcl_udp_fanout)); 4900Sstevel@tonic-gate 4910Sstevel@tonic-gate for (i = 0; i < ipcl_raw_fanout_size; i++) 4920Sstevel@tonic-gate mutex_destroy(&ipcl_raw_fanout[i].connf_lock); 4930Sstevel@tonic-gate kmem_free(ipcl_raw_fanout, ipcl_raw_fanout_size * 4940Sstevel@tonic-gate sizeof (*ipcl_raw_fanout)); 4950Sstevel@tonic-gate 4960Sstevel@tonic-gate kmem_free(ipcl_globalhash_fanout, sizeof (connf_t) * CONN_G_HASH_SIZE); 4970Sstevel@tonic-gate mutex_destroy(&rts_clients.connf_lock); 4980Sstevel@tonic-gate } 4990Sstevel@tonic-gate 5000Sstevel@tonic-gate /* 5010Sstevel@tonic-gate * conn creation routine. initialize the conn, sets the reference 5020Sstevel@tonic-gate * and inserts it in the global hash table. 5030Sstevel@tonic-gate */ 5040Sstevel@tonic-gate conn_t * 5050Sstevel@tonic-gate ipcl_conn_create(uint32_t type, int sleep) 5060Sstevel@tonic-gate { 5070Sstevel@tonic-gate itc_t *itc; 5080Sstevel@tonic-gate conn_t *connp; 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate switch (type) { 5110Sstevel@tonic-gate case IPCL_TCPCONN: 5120Sstevel@tonic-gate if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache, 5130Sstevel@tonic-gate sleep)) == NULL) 5140Sstevel@tonic-gate return (NULL); 5150Sstevel@tonic-gate connp = &itc->itc_conn; 5160Sstevel@tonic-gate connp->conn_ref = 1; 5170Sstevel@tonic-gate IPCL_DEBUG_LVL(1, 5180Sstevel@tonic-gate ("ipcl_conn_create: connp = %p tcp (%p)", 5190Sstevel@tonic-gate (void *)connp, (void *)connp->conn_tcp)); 5200Sstevel@tonic-gate ipcl_globalhash_insert(connp); 5210Sstevel@tonic-gate break; 5220Sstevel@tonic-gate case IPCL_SCTPCONN: 5230Sstevel@tonic-gate if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 5240Sstevel@tonic-gate return (NULL); 5250Sstevel@tonic-gate connp->conn_flags = IPCL_SCTPCONN; 5260Sstevel@tonic-gate break; 5270Sstevel@tonic-gate case IPCL_IPCCONN: 5280Sstevel@tonic-gate connp = kmem_cache_alloc(ipcl_conn_cache, sleep); 5290Sstevel@tonic-gate if (connp == NULL) 530741Smasputra return (NULL); 5310Sstevel@tonic-gate bzero(connp, sizeof (conn_t)); 532741Smasputra mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 5330Sstevel@tonic-gate cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 534741Smasputra connp->conn_flags = IPCL_IPCCONN; 5350Sstevel@tonic-gate connp->conn_ref = 1; 5360Sstevel@tonic-gate IPCL_DEBUG_LVL(1, 5370Sstevel@tonic-gate ("ipcl_conn_create: connp = %p\n", (void *)connp)); 5380Sstevel@tonic-gate ipcl_globalhash_insert(connp); 5390Sstevel@tonic-gate break; 540741Smasputra default: 541741Smasputra connp = NULL; 542741Smasputra ASSERT(0); 5430Sstevel@tonic-gate } 5440Sstevel@tonic-gate 5450Sstevel@tonic-gate return (connp); 5460Sstevel@tonic-gate } 5470Sstevel@tonic-gate 5480Sstevel@tonic-gate void 5490Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp) 5500Sstevel@tonic-gate { 5510Sstevel@tonic-gate mblk_t *mp; 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&connp->conn_lock)); 5540Sstevel@tonic-gate ASSERT(connp->conn_ref == 0); 5550Sstevel@tonic-gate ASSERT(connp->conn_ire_cache == NULL); 5560Sstevel@tonic-gate 557*1676Sjpk if (connp->conn_peercred != NULL && 558*1676Sjpk connp->conn_peercred != connp->conn_cred) 559*1676Sjpk crfree(connp->conn_peercred); 560*1676Sjpk connp->conn_peercred = NULL; 561*1676Sjpk 562*1676Sjpk if (connp->conn_cred != NULL) { 563*1676Sjpk crfree(connp->conn_cred); 564*1676Sjpk connp->conn_cred = NULL; 565*1676Sjpk } 566*1676Sjpk 5670Sstevel@tonic-gate ipcl_globalhash_remove(connp); 5680Sstevel@tonic-gate 5690Sstevel@tonic-gate cv_destroy(&connp->conn_cv); 5700Sstevel@tonic-gate if (connp->conn_flags & IPCL_TCPCONN) { 571741Smasputra tcp_t *tcp = connp->conn_tcp; 572741Smasputra 5730Sstevel@tonic-gate mutex_destroy(&connp->conn_lock); 5740Sstevel@tonic-gate ASSERT(connp->conn_tcp != NULL); 5750Sstevel@tonic-gate tcp_free(tcp); 5760Sstevel@tonic-gate mp = tcp->tcp_timercache; 577*1676Sjpk tcp->tcp_cred = NULL; 5780Sstevel@tonic-gate 5790Sstevel@tonic-gate if (tcp->tcp_sack_info != NULL) { 5800Sstevel@tonic-gate bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 5810Sstevel@tonic-gate kmem_cache_free(tcp_sack_info_cache, 5820Sstevel@tonic-gate tcp->tcp_sack_info); 5830Sstevel@tonic-gate } 5840Sstevel@tonic-gate if (tcp->tcp_iphc != NULL) { 5850Sstevel@tonic-gate if (tcp->tcp_hdr_grown) { 5860Sstevel@tonic-gate kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 5870Sstevel@tonic-gate } else { 5880Sstevel@tonic-gate bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 5890Sstevel@tonic-gate kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 5900Sstevel@tonic-gate } 5910Sstevel@tonic-gate tcp->tcp_iphc_len = 0; 5920Sstevel@tonic-gate } 5930Sstevel@tonic-gate ASSERT(tcp->tcp_iphc_len == 0); 5940Sstevel@tonic-gate 5950Sstevel@tonic-gate if (connp->conn_latch != NULL) 5960Sstevel@tonic-gate IPLATCH_REFRELE(connp->conn_latch); 5970Sstevel@tonic-gate if (connp->conn_policy != NULL) 5980Sstevel@tonic-gate IPPH_REFRELE(connp->conn_policy); 5990Sstevel@tonic-gate bzero(connp, sizeof (itc_t)); 6000Sstevel@tonic-gate 6010Sstevel@tonic-gate tcp->tcp_timercache = mp; 6020Sstevel@tonic-gate connp->conn_tcp = tcp; 6030Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN; 6040Sstevel@tonic-gate connp->conn_ulp = IPPROTO_TCP; 6050Sstevel@tonic-gate tcp->tcp_connp = connp; 6060Sstevel@tonic-gate kmem_cache_free(ipcl_tcpconn_cache, connp); 6070Sstevel@tonic-gate } else if (connp->conn_flags & IPCL_SCTPCONN) { 6080Sstevel@tonic-gate sctp_free(connp); 6090Sstevel@tonic-gate } else { 610741Smasputra ASSERT(connp->conn_udp == NULL); 6110Sstevel@tonic-gate mutex_destroy(&connp->conn_lock); 6120Sstevel@tonic-gate kmem_cache_free(ipcl_conn_cache, connp); 6130Sstevel@tonic-gate } 6140Sstevel@tonic-gate } 6150Sstevel@tonic-gate 6160Sstevel@tonic-gate /* 6170Sstevel@tonic-gate * Running in cluster mode - deregister listener information 6180Sstevel@tonic-gate */ 6190Sstevel@tonic-gate 6200Sstevel@tonic-gate static void 6210Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp) 6220Sstevel@tonic-gate { 6230Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 6240Sstevel@tonic-gate ASSERT(connp->conn_lport != 0); 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate if (cl_inet_unlisten != NULL) { 6270Sstevel@tonic-gate sa_family_t addr_family; 6280Sstevel@tonic-gate uint8_t *laddrp; 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 6310Sstevel@tonic-gate addr_family = AF_INET6; 6320Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source_v6; 6330Sstevel@tonic-gate } else { 6340Sstevel@tonic-gate addr_family = AF_INET; 6350Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 6360Sstevel@tonic-gate } 6370Sstevel@tonic-gate (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, 6380Sstevel@tonic-gate connp->conn_lport); 6390Sstevel@tonic-gate } 6400Sstevel@tonic-gate connp->conn_flags &= ~IPCL_CL_LISTENER; 6410Sstevel@tonic-gate } 6420Sstevel@tonic-gate 6430Sstevel@tonic-gate /* 6440Sstevel@tonic-gate * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 6450Sstevel@tonic-gate * which table the conn belonged to). So for debugging we can see which hash 6460Sstevel@tonic-gate * table this connection was in. 6470Sstevel@tonic-gate */ 6480Sstevel@tonic-gate #define IPCL_HASH_REMOVE(connp) { \ 6490Sstevel@tonic-gate connf_t *connfp = (connp)->conn_fanout; \ 6500Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 6510Sstevel@tonic-gate if (connfp != NULL) { \ 6520Sstevel@tonic-gate IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 6530Sstevel@tonic-gate (void *)(connp))); \ 6540Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); \ 6550Sstevel@tonic-gate if ((connp)->conn_next != NULL) \ 6560Sstevel@tonic-gate (connp)->conn_next->conn_prev = \ 6570Sstevel@tonic-gate (connp)->conn_prev; \ 6580Sstevel@tonic-gate if ((connp)->conn_prev != NULL) \ 6590Sstevel@tonic-gate (connp)->conn_prev->conn_next = \ 6600Sstevel@tonic-gate (connp)->conn_next; \ 6610Sstevel@tonic-gate else \ 6620Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; \ 6630Sstevel@tonic-gate (connp)->conn_fanout = NULL; \ 6640Sstevel@tonic-gate (connp)->conn_next = NULL; \ 6650Sstevel@tonic-gate (connp)->conn_prev = NULL; \ 6660Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; \ 6670Sstevel@tonic-gate if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 6680Sstevel@tonic-gate ipcl_conn_unlisten((connp)); \ 6690Sstevel@tonic-gate CONN_DEC_REF((connp)); \ 6700Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); \ 6710Sstevel@tonic-gate } \ 6720Sstevel@tonic-gate } 6730Sstevel@tonic-gate 6740Sstevel@tonic-gate void 6750Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp) 6760Sstevel@tonic-gate { 6770Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 6780Sstevel@tonic-gate } 6790Sstevel@tonic-gate 6800Sstevel@tonic-gate /* 6810Sstevel@tonic-gate * The whole purpose of this function is allow removal of 6820Sstevel@tonic-gate * a conn_t from the connected hash for timewait reclaim. 6830Sstevel@tonic-gate * This is essentially a TW reclaim fastpath where timewait 6840Sstevel@tonic-gate * collector checks under fanout lock (so no one else can 6850Sstevel@tonic-gate * get access to the conn_t) that refcnt is 2 i.e. one for 6860Sstevel@tonic-gate * TCP and one for the classifier hash list. If ref count 6870Sstevel@tonic-gate * is indeed 2, we can just remove the conn under lock and 6880Sstevel@tonic-gate * avoid cleaning up the conn under squeue. This gives us 6890Sstevel@tonic-gate * improved performance. 6900Sstevel@tonic-gate */ 6910Sstevel@tonic-gate void 6920Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 6930Sstevel@tonic-gate { 6940Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connfp->connf_lock)); 6950Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 6960Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 6970Sstevel@tonic-gate 6980Sstevel@tonic-gate if ((connp)->conn_next != NULL) { 6990Sstevel@tonic-gate (connp)->conn_next->conn_prev = 7000Sstevel@tonic-gate (connp)->conn_prev; 7010Sstevel@tonic-gate } 7020Sstevel@tonic-gate if ((connp)->conn_prev != NULL) { 7030Sstevel@tonic-gate (connp)->conn_prev->conn_next = 7040Sstevel@tonic-gate (connp)->conn_next; 7050Sstevel@tonic-gate } else { 7060Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; 7070Sstevel@tonic-gate } 7080Sstevel@tonic-gate (connp)->conn_fanout = NULL; 7090Sstevel@tonic-gate (connp)->conn_next = NULL; 7100Sstevel@tonic-gate (connp)->conn_prev = NULL; 7110Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; 7120Sstevel@tonic-gate ASSERT((connp)->conn_ref == 2); 7130Sstevel@tonic-gate (connp)->conn_ref--; 7140Sstevel@tonic-gate } 7150Sstevel@tonic-gate 7160Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 7170Sstevel@tonic-gate ASSERT((connp)->conn_fanout == NULL); \ 7180Sstevel@tonic-gate ASSERT((connp)->conn_next == NULL); \ 7190Sstevel@tonic-gate ASSERT((connp)->conn_prev == NULL); \ 7200Sstevel@tonic-gate if ((connfp)->connf_head != NULL) { \ 7210Sstevel@tonic-gate (connfp)->connf_head->conn_prev = (connp); \ 7220Sstevel@tonic-gate (connp)->conn_next = (connfp)->connf_head; \ 7230Sstevel@tonic-gate } \ 7240Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 7250Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 7260Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 7270Sstevel@tonic-gate IPCL_CONNECTED; \ 7280Sstevel@tonic-gate CONN_INC_REF(connp); \ 7290Sstevel@tonic-gate } 7300Sstevel@tonic-gate 7310Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 7320Sstevel@tonic-gate IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 7330Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 7340Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 7350Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 7360Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 7370Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 7380Sstevel@tonic-gate } 7390Sstevel@tonic-gate 7400Sstevel@tonic-gate #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 7410Sstevel@tonic-gate conn_t *pconnp = NULL, *nconnp; \ 7420Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 7430Sstevel@tonic-gate "connp %p", (void *)connfp, (void *)(connp))); \ 7440Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 7450Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 7460Sstevel@tonic-gate nconnp = (connfp)->connf_head; \ 747153Sethindra while (nconnp != NULL && \ 748153Sethindra !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 749153Sethindra pconnp = nconnp; \ 750153Sethindra nconnp = nconnp->conn_next; \ 7510Sstevel@tonic-gate } \ 7520Sstevel@tonic-gate if (pconnp != NULL) { \ 7530Sstevel@tonic-gate pconnp->conn_next = (connp); \ 7540Sstevel@tonic-gate (connp)->conn_prev = pconnp; \ 7550Sstevel@tonic-gate } else { \ 7560Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 7570Sstevel@tonic-gate } \ 7580Sstevel@tonic-gate if (nconnp != NULL) { \ 7590Sstevel@tonic-gate (connp)->conn_next = nconnp; \ 7600Sstevel@tonic-gate nconnp->conn_prev = (connp); \ 7610Sstevel@tonic-gate } \ 7620Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 7630Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 7640Sstevel@tonic-gate IPCL_BOUND; \ 7650Sstevel@tonic-gate CONN_INC_REF(connp); \ 7660Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 7670Sstevel@tonic-gate } 7680Sstevel@tonic-gate 7690Sstevel@tonic-gate #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 7700Sstevel@tonic-gate conn_t **list, *prev, *next; \ 7710Sstevel@tonic-gate boolean_t isv4mapped = \ 7720Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 7730Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 7740Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 7750Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 7760Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 7770Sstevel@tonic-gate list = &(connfp)->connf_head; \ 7780Sstevel@tonic-gate prev = NULL; \ 7790Sstevel@tonic-gate while ((next = *list) != NULL) { \ 7800Sstevel@tonic-gate if (isv4mapped && \ 7810Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 7820Sstevel@tonic-gate connp->conn_zoneid == next->conn_zoneid) { \ 7830Sstevel@tonic-gate (connp)->conn_next = next; \ 7840Sstevel@tonic-gate if (prev != NULL) \ 7850Sstevel@tonic-gate prev = next->conn_prev; \ 7860Sstevel@tonic-gate next->conn_prev = (connp); \ 7870Sstevel@tonic-gate break; \ 7880Sstevel@tonic-gate } \ 7890Sstevel@tonic-gate list = &next->conn_next; \ 7900Sstevel@tonic-gate prev = next; \ 7910Sstevel@tonic-gate } \ 7920Sstevel@tonic-gate (connp)->conn_prev = prev; \ 7930Sstevel@tonic-gate *list = (connp); \ 7940Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 7950Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 7960Sstevel@tonic-gate IPCL_BOUND; \ 7970Sstevel@tonic-gate CONN_INC_REF((connp)); \ 7980Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 7990Sstevel@tonic-gate } 8000Sstevel@tonic-gate 8010Sstevel@tonic-gate void 8020Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 8030Sstevel@tonic-gate { 804*1676Sjpk ASSERT(!connp->conn_mac_exempt); 8050Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 8060Sstevel@tonic-gate } 8070Sstevel@tonic-gate 8080Sstevel@tonic-gate void 8090Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol) 8100Sstevel@tonic-gate { 8110Sstevel@tonic-gate connf_t *connfp; 8120Sstevel@tonic-gate 8130Sstevel@tonic-gate ASSERT(connp != NULL); 814*1676Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 815*1676Sjpk protocol == IPPROTO_ESP); 8160Sstevel@tonic-gate 8170Sstevel@tonic-gate connp->conn_ulp = protocol; 8180Sstevel@tonic-gate 8190Sstevel@tonic-gate /* Insert it in the protocol hash */ 8200Sstevel@tonic-gate connfp = &ipcl_proto_fanout[protocol]; 8210Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 8220Sstevel@tonic-gate } 8230Sstevel@tonic-gate 8240Sstevel@tonic-gate void 8250Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 8260Sstevel@tonic-gate { 8270Sstevel@tonic-gate connf_t *connfp; 8280Sstevel@tonic-gate 8290Sstevel@tonic-gate ASSERT(connp != NULL); 830*1676Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 831*1676Sjpk protocol == IPPROTO_ESP); 8320Sstevel@tonic-gate 8330Sstevel@tonic-gate connp->conn_ulp = protocol; 8340Sstevel@tonic-gate 8350Sstevel@tonic-gate /* Insert it in the Bind Hash */ 8360Sstevel@tonic-gate connfp = &ipcl_proto_fanout_v6[protocol]; 8370Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 8380Sstevel@tonic-gate } 8390Sstevel@tonic-gate 8400Sstevel@tonic-gate /* 8410Sstevel@tonic-gate * This function is used only for inserting SCTP raw socket now. 8420Sstevel@tonic-gate * This may change later. 8430Sstevel@tonic-gate * 8440Sstevel@tonic-gate * Note that only one raw socket can be bound to a port. The param 8450Sstevel@tonic-gate * lport is in network byte order. 8460Sstevel@tonic-gate */ 8470Sstevel@tonic-gate static int 8480Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 8490Sstevel@tonic-gate { 8500Sstevel@tonic-gate connf_t *connfp; 8510Sstevel@tonic-gate conn_t *oconnp; 8520Sstevel@tonic-gate 8530Sstevel@tonic-gate connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; 8540Sstevel@tonic-gate 8550Sstevel@tonic-gate /* Check for existing raw socket already bound to the port. */ 8560Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 8570Sstevel@tonic-gate for (oconnp = connfp->connf_head; oconnp != NULL; 858409Skcpoon oconnp = oconnp->conn_next) { 8590Sstevel@tonic-gate if (oconnp->conn_lport == lport && 8600Sstevel@tonic-gate oconnp->conn_zoneid == connp->conn_zoneid && 8610Sstevel@tonic-gate oconnp->conn_af_isv6 == connp->conn_af_isv6 && 8620Sstevel@tonic-gate ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 8630Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 8640Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 8650Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 8660Sstevel@tonic-gate IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 8670Sstevel@tonic-gate &connp->conn_srcv6))) { 8680Sstevel@tonic-gate break; 8690Sstevel@tonic-gate } 8700Sstevel@tonic-gate } 8710Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 8720Sstevel@tonic-gate if (oconnp != NULL) 8730Sstevel@tonic-gate return (EADDRNOTAVAIL); 8740Sstevel@tonic-gate 8750Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 8760Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 8770Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 8780Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 8790Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 8800Sstevel@tonic-gate } else { 8810Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 8820Sstevel@tonic-gate } 8830Sstevel@tonic-gate } else { 8840Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 8850Sstevel@tonic-gate } 8860Sstevel@tonic-gate return (0); 8870Sstevel@tonic-gate } 8880Sstevel@tonic-gate 8890Sstevel@tonic-gate /* 890*1676Sjpk * Check for a MAC exemption conflict on a labeled system. Note that for 891*1676Sjpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 892*1676Sjpk * transport layer. This check is for binding all other protocols. 893*1676Sjpk * 894*1676Sjpk * Returns true if there's a conflict. 895*1676Sjpk */ 896*1676Sjpk static boolean_t 897*1676Sjpk check_exempt_conflict_v4(conn_t *connp) 898*1676Sjpk { 899*1676Sjpk connf_t *connfp; 900*1676Sjpk conn_t *tconn; 901*1676Sjpk 902*1676Sjpk connfp = &ipcl_proto_fanout[connp->conn_ulp]; 903*1676Sjpk mutex_enter(&connfp->connf_lock); 904*1676Sjpk for (tconn = connfp->connf_head; tconn != NULL; 905*1676Sjpk tconn = tconn->conn_next) { 906*1676Sjpk /* We don't allow v4 fallback for v6 raw socket */ 907*1676Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 908*1676Sjpk continue; 909*1676Sjpk /* If neither is exempt, then there's no conflict */ 910*1676Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 911*1676Sjpk continue; 912*1676Sjpk /* If both are bound to different specific addrs, ok */ 913*1676Sjpk if (connp->conn_src != INADDR_ANY && 914*1676Sjpk tconn->conn_src != INADDR_ANY && 915*1676Sjpk connp->conn_src != tconn->conn_src) 916*1676Sjpk continue; 917*1676Sjpk /* These two conflict; fail */ 918*1676Sjpk break; 919*1676Sjpk } 920*1676Sjpk mutex_exit(&connfp->connf_lock); 921*1676Sjpk return (tconn != NULL); 922*1676Sjpk } 923*1676Sjpk 924*1676Sjpk static boolean_t 925*1676Sjpk check_exempt_conflict_v6(conn_t *connp) 926*1676Sjpk { 927*1676Sjpk connf_t *connfp; 928*1676Sjpk conn_t *tconn; 929*1676Sjpk 930*1676Sjpk connfp = &ipcl_proto_fanout[connp->conn_ulp]; 931*1676Sjpk mutex_enter(&connfp->connf_lock); 932*1676Sjpk for (tconn = connfp->connf_head; tconn != NULL; 933*1676Sjpk tconn = tconn->conn_next) { 934*1676Sjpk /* We don't allow v4 fallback for v6 raw socket */ 935*1676Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 936*1676Sjpk continue; 937*1676Sjpk /* If neither is exempt, then there's no conflict */ 938*1676Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 939*1676Sjpk continue; 940*1676Sjpk /* If both are bound to different addrs, ok */ 941*1676Sjpk if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) && 942*1676Sjpk !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) && 943*1676Sjpk !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6)) 944*1676Sjpk continue; 945*1676Sjpk /* These two conflict; fail */ 946*1676Sjpk break; 947*1676Sjpk } 948*1676Sjpk mutex_exit(&connfp->connf_lock); 949*1676Sjpk return (tconn != NULL); 950*1676Sjpk } 951*1676Sjpk 952*1676Sjpk /* 9530Sstevel@tonic-gate * (v4, v6) bind hash insertion routines 9540Sstevel@tonic-gate */ 9550Sstevel@tonic-gate int 9560Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 9570Sstevel@tonic-gate { 9580Sstevel@tonic-gate connf_t *connfp; 9590Sstevel@tonic-gate #ifdef IPCL_DEBUG 9600Sstevel@tonic-gate char buf[INET_NTOA_BUFSIZE]; 9610Sstevel@tonic-gate #endif 9620Sstevel@tonic-gate int ret = 0; 9630Sstevel@tonic-gate 9640Sstevel@tonic-gate ASSERT(connp); 9650Sstevel@tonic-gate 9660Sstevel@tonic-gate IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 9670Sstevel@tonic-gate "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 9680Sstevel@tonic-gate 9690Sstevel@tonic-gate connp->conn_ulp = protocol; 9700Sstevel@tonic-gate IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 9710Sstevel@tonic-gate connp->conn_lport = lport; 9720Sstevel@tonic-gate 9730Sstevel@tonic-gate switch (protocol) { 974*1676Sjpk default: 975*1676Sjpk if (is_system_labeled() && check_exempt_conflict_v4(connp)) 976*1676Sjpk return (EADDRINUSE); 977*1676Sjpk /* FALLTHROUGH */ 9780Sstevel@tonic-gate case IPPROTO_UDP: 9790Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 9800Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 9810Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - udp\n", 9820Sstevel@tonic-gate (void *)connp)); 9830Sstevel@tonic-gate connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 9840Sstevel@tonic-gate } else { 9850Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 9860Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - protocol\n", 9870Sstevel@tonic-gate (void *)connp)); 9880Sstevel@tonic-gate connfp = &ipcl_proto_fanout[protocol]; 9890Sstevel@tonic-gate } 9900Sstevel@tonic-gate 9910Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 9920Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 9930Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 9940Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 9950Sstevel@tonic-gate } else { 9960Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9970Sstevel@tonic-gate } 9980Sstevel@tonic-gate break; 9990Sstevel@tonic-gate 10000Sstevel@tonic-gate case IPPROTO_TCP: 10010Sstevel@tonic-gate 10020Sstevel@tonic-gate /* Insert it in the Bind Hash */ 1003*1676Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 10040Sstevel@tonic-gate connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 10050Sstevel@tonic-gate if (connp->conn_src != INADDR_ANY) { 10060Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10070Sstevel@tonic-gate } else { 10080Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10090Sstevel@tonic-gate } 10100Sstevel@tonic-gate if (cl_inet_listen != NULL) { 10110Sstevel@tonic-gate ASSERT(!connp->conn_pkt_isv6); 10120Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 10130Sstevel@tonic-gate (*cl_inet_listen)(IPPROTO_TCP, AF_INET, 10140Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source, lport); 10150Sstevel@tonic-gate } 10160Sstevel@tonic-gate break; 10170Sstevel@tonic-gate 10180Sstevel@tonic-gate case IPPROTO_SCTP: 10190Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 10200Sstevel@tonic-gate break; 10210Sstevel@tonic-gate } 10220Sstevel@tonic-gate 10230Sstevel@tonic-gate return (ret); 10240Sstevel@tonic-gate } 10250Sstevel@tonic-gate 10260Sstevel@tonic-gate int 10270Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 10280Sstevel@tonic-gate uint16_t lport) 10290Sstevel@tonic-gate { 10300Sstevel@tonic-gate connf_t *connfp; 10310Sstevel@tonic-gate int ret = 0; 10320Sstevel@tonic-gate 10330Sstevel@tonic-gate ASSERT(connp); 10340Sstevel@tonic-gate 10350Sstevel@tonic-gate connp->conn_ulp = protocol; 10360Sstevel@tonic-gate connp->conn_srcv6 = *src; 10370Sstevel@tonic-gate connp->conn_lport = lport; 10380Sstevel@tonic-gate 10390Sstevel@tonic-gate switch (protocol) { 1040*1676Sjpk default: 1041*1676Sjpk if (is_system_labeled() && check_exempt_conflict_v6(connp)) 1042*1676Sjpk return (EADDRINUSE); 1043*1676Sjpk /* FALLTHROUGH */ 10440Sstevel@tonic-gate case IPPROTO_UDP: 10450Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 10460Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 10470Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - udp\n", 10480Sstevel@tonic-gate (void *)connp)); 10490Sstevel@tonic-gate connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 10500Sstevel@tonic-gate } else { 10510Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 10520Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - protocol\n", 10530Sstevel@tonic-gate (void *)connp)); 10540Sstevel@tonic-gate connfp = &ipcl_proto_fanout_v6[protocol]; 10550Sstevel@tonic-gate } 10560Sstevel@tonic-gate 10570Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 10580Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 10590Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 10600Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10610Sstevel@tonic-gate } else { 10620Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10630Sstevel@tonic-gate } 10640Sstevel@tonic-gate break; 10650Sstevel@tonic-gate 10660Sstevel@tonic-gate case IPPROTO_TCP: 10670Sstevel@tonic-gate /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 10680Sstevel@tonic-gate 10690Sstevel@tonic-gate /* Insert it in the Bind Hash */ 1070*1676Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 10710Sstevel@tonic-gate connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 10720Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 10730Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10740Sstevel@tonic-gate } else { 10750Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10760Sstevel@tonic-gate } 10770Sstevel@tonic-gate if (cl_inet_listen != NULL) { 10780Sstevel@tonic-gate sa_family_t addr_family; 10790Sstevel@tonic-gate uint8_t *laddrp; 10800Sstevel@tonic-gate 10810Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 10820Sstevel@tonic-gate addr_family = AF_INET6; 10830Sstevel@tonic-gate laddrp = 10840Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source_v6; 10850Sstevel@tonic-gate } else { 10860Sstevel@tonic-gate addr_family = AF_INET; 10870Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 10880Sstevel@tonic-gate } 10890Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 10900Sstevel@tonic-gate (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, 10910Sstevel@tonic-gate lport); 10920Sstevel@tonic-gate } 10930Sstevel@tonic-gate break; 10940Sstevel@tonic-gate 10950Sstevel@tonic-gate case IPPROTO_SCTP: 10960Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 10970Sstevel@tonic-gate break; 10980Sstevel@tonic-gate } 10990Sstevel@tonic-gate 11000Sstevel@tonic-gate return (ret); 11010Sstevel@tonic-gate } 11020Sstevel@tonic-gate 11030Sstevel@tonic-gate /* 11040Sstevel@tonic-gate * ipcl_conn_hash insertion routines. 11050Sstevel@tonic-gate */ 11060Sstevel@tonic-gate int 11070Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 11080Sstevel@tonic-gate ipaddr_t rem, uint32_t ports) 11090Sstevel@tonic-gate { 11100Sstevel@tonic-gate connf_t *connfp; 11110Sstevel@tonic-gate uint16_t *up; 11120Sstevel@tonic-gate conn_t *tconnp; 11130Sstevel@tonic-gate #ifdef IPCL_DEBUG 11140Sstevel@tonic-gate char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 11150Sstevel@tonic-gate #endif 11160Sstevel@tonic-gate in_port_t lport; 11170Sstevel@tonic-gate int ret = 0; 11180Sstevel@tonic-gate 11190Sstevel@tonic-gate IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 11200Sstevel@tonic-gate "dst = %s, ports = %x, protocol = %x", (void *)connp, 11210Sstevel@tonic-gate inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 11220Sstevel@tonic-gate ports, protocol)); 11230Sstevel@tonic-gate 11240Sstevel@tonic-gate switch (protocol) { 11250Sstevel@tonic-gate case IPPROTO_TCP: 11260Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 11270Sstevel@tonic-gate /* 11280Sstevel@tonic-gate * for a eager connection, i.e connections which 11290Sstevel@tonic-gate * have just been created, the initialization is 11300Sstevel@tonic-gate * already done in ip at conn_creation time, so 11310Sstevel@tonic-gate * we can skip the checks here. 11320Sstevel@tonic-gate */ 11330Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 11340Sstevel@tonic-gate } 11350Sstevel@tonic-gate connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(connp->conn_rem, 11360Sstevel@tonic-gate connp->conn_ports)]; 11370Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 11380Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 11390Sstevel@tonic-gate tconnp = tconnp->conn_next) { 11400Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 11410Sstevel@tonic-gate connp->conn_rem, connp->conn_src, 11420Sstevel@tonic-gate connp->conn_ports)) { 11430Sstevel@tonic-gate 11440Sstevel@tonic-gate /* Already have a conn. bail out */ 11450Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 11460Sstevel@tonic-gate return (EADDRINUSE); 11470Sstevel@tonic-gate } 11480Sstevel@tonic-gate } 11490Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 11500Sstevel@tonic-gate /* 11510Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 11520Sstevel@tonic-gate * rebind. Let it happen. 11530Sstevel@tonic-gate */ 11540Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 11550Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 11560Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 11570Sstevel@tonic-gate } 11580Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 11590Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 11600Sstevel@tonic-gate break; 11610Sstevel@tonic-gate 11620Sstevel@tonic-gate case IPPROTO_SCTP: 1163409Skcpoon /* 1164409Skcpoon * The raw socket may have already been bound, remove it 1165409Skcpoon * from the hash first. 1166409Skcpoon */ 1167409Skcpoon IPCL_HASH_REMOVE(connp); 1168409Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 11690Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 11700Sstevel@tonic-gate break; 11710Sstevel@tonic-gate 1172*1676Sjpk default: 1173*1676Sjpk /* 1174*1676Sjpk * Check for conflicts among MAC exempt bindings. For 1175*1676Sjpk * transports with port numbers, this is done by the upper 1176*1676Sjpk * level per-transport binding logic. For all others, it's 1177*1676Sjpk * done here. 1178*1676Sjpk */ 1179*1676Sjpk if (is_system_labeled() && check_exempt_conflict_v4(connp)) 1180*1676Sjpk return (EADDRINUSE); 1181*1676Sjpk /* FALLTHROUGH */ 1182*1676Sjpk 11830Sstevel@tonic-gate case IPPROTO_UDP: 11840Sstevel@tonic-gate up = (uint16_t *)&ports; 11850Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 11860Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 11870Sstevel@tonic-gate connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; 11880Sstevel@tonic-gate } else { 11890Sstevel@tonic-gate connfp = &ipcl_proto_fanout[protocol]; 11900Sstevel@tonic-gate } 11910Sstevel@tonic-gate 11920Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 11930Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 11940Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 11950Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11960Sstevel@tonic-gate } else { 11970Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11980Sstevel@tonic-gate } 11990Sstevel@tonic-gate break; 12000Sstevel@tonic-gate } 12010Sstevel@tonic-gate 12020Sstevel@tonic-gate return (ret); 12030Sstevel@tonic-gate } 12040Sstevel@tonic-gate 12050Sstevel@tonic-gate int 12060Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 12070Sstevel@tonic-gate const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 12080Sstevel@tonic-gate { 12090Sstevel@tonic-gate connf_t *connfp; 12100Sstevel@tonic-gate uint16_t *up; 12110Sstevel@tonic-gate conn_t *tconnp; 12120Sstevel@tonic-gate in_port_t lport; 12130Sstevel@tonic-gate int ret = 0; 12140Sstevel@tonic-gate 12150Sstevel@tonic-gate switch (protocol) { 12160Sstevel@tonic-gate case IPPROTO_TCP: 12170Sstevel@tonic-gate /* Just need to insert a conn struct */ 12180Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 12190Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 12200Sstevel@tonic-gate } 12210Sstevel@tonic-gate connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(connp->conn_remv6, 12220Sstevel@tonic-gate connp->conn_ports)]; 12230Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 12240Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 12250Sstevel@tonic-gate tconnp = tconnp->conn_next) { 12260Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 12270Sstevel@tonic-gate connp->conn_remv6, connp->conn_srcv6, 12280Sstevel@tonic-gate connp->conn_ports) && 12290Sstevel@tonic-gate (tconnp->conn_tcp->tcp_bound_if == 0 || 12300Sstevel@tonic-gate tconnp->conn_tcp->tcp_bound_if == ifindex)) { 12310Sstevel@tonic-gate /* Already have a conn. bail out */ 12320Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 12330Sstevel@tonic-gate return (EADDRINUSE); 12340Sstevel@tonic-gate } 12350Sstevel@tonic-gate } 12360Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 12370Sstevel@tonic-gate /* 12380Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 12390Sstevel@tonic-gate * rebind. Let it happen. 12400Sstevel@tonic-gate */ 12410Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 12420Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 12430Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 12440Sstevel@tonic-gate } 12450Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 12460Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 12470Sstevel@tonic-gate break; 12480Sstevel@tonic-gate 12490Sstevel@tonic-gate case IPPROTO_SCTP: 1250409Skcpoon IPCL_HASH_REMOVE(connp); 1251409Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 12520Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 12530Sstevel@tonic-gate break; 12540Sstevel@tonic-gate 1255*1676Sjpk default: 1256*1676Sjpk if (is_system_labeled() && check_exempt_conflict_v6(connp)) 1257*1676Sjpk return (EADDRINUSE); 1258*1676Sjpk /* FALLTHROUGH */ 12590Sstevel@tonic-gate case IPPROTO_UDP: 12600Sstevel@tonic-gate up = (uint16_t *)&ports; 12610Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 12620Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 12630Sstevel@tonic-gate connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; 12640Sstevel@tonic-gate } else { 12650Sstevel@tonic-gate connfp = &ipcl_proto_fanout_v6[protocol]; 12660Sstevel@tonic-gate } 12670Sstevel@tonic-gate 12680Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 12690Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 12700Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 12710Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12720Sstevel@tonic-gate } else { 12730Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12740Sstevel@tonic-gate } 12750Sstevel@tonic-gate break; 12760Sstevel@tonic-gate } 12770Sstevel@tonic-gate 12780Sstevel@tonic-gate return (ret); 12790Sstevel@tonic-gate } 12800Sstevel@tonic-gate 12810Sstevel@tonic-gate /* 12820Sstevel@tonic-gate * v4 packet classifying function. looks up the fanout table to 12830Sstevel@tonic-gate * find the conn, the packet belongs to. returns the conn with 12840Sstevel@tonic-gate * the reference held, null otherwise. 1285*1676Sjpk * 1286*1676Sjpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection 1287*1676Sjpk * Lookup" comment block are applied. Labels are also checked as described 1288*1676Sjpk * above. If the packet is from the inside (looped back), and is from the same 1289*1676Sjpk * zone, then label checks are omitted. 12900Sstevel@tonic-gate */ 12910Sstevel@tonic-gate conn_t * 12920Sstevel@tonic-gate ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) 12930Sstevel@tonic-gate { 12940Sstevel@tonic-gate ipha_t *ipha; 12950Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 12960Sstevel@tonic-gate uint16_t lport; 12970Sstevel@tonic-gate uint16_t fport; 12980Sstevel@tonic-gate uint32_t ports; 12990Sstevel@tonic-gate conn_t *connp; 13000Sstevel@tonic-gate uint16_t *up; 1301*1676Sjpk boolean_t shared_addr; 1302*1676Sjpk boolean_t unlabeled; 13030Sstevel@tonic-gate 13040Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 13050Sstevel@tonic-gate up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 13060Sstevel@tonic-gate 13070Sstevel@tonic-gate switch (protocol) { 13080Sstevel@tonic-gate case IPPROTO_TCP: 13090Sstevel@tonic-gate ports = *(uint32_t *)up; 13100Sstevel@tonic-gate connfp = 13110Sstevel@tonic-gate &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, ports)]; 13120Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13130Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 13140Sstevel@tonic-gate connp = connp->conn_next) { 13150Sstevel@tonic-gate if (IPCL_CONN_MATCH(connp, protocol, 13160Sstevel@tonic-gate ipha->ipha_src, ipha->ipha_dst, ports)) 13170Sstevel@tonic-gate break; 13180Sstevel@tonic-gate } 13190Sstevel@tonic-gate 13200Sstevel@tonic-gate if (connp != NULL) { 1321*1676Sjpk /* 1322*1676Sjpk * We have a fully-bound TCP connection. 1323*1676Sjpk * 1324*1676Sjpk * For labeled systems, there's no need to check the 1325*1676Sjpk * label here. It's known to be good as we checked 1326*1676Sjpk * before allowing the connection to become bound. 1327*1676Sjpk */ 13280Sstevel@tonic-gate CONN_INC_REF(connp); 13290Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13300Sstevel@tonic-gate return (connp); 13310Sstevel@tonic-gate } 13320Sstevel@tonic-gate 13330Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13340Sstevel@tonic-gate 13350Sstevel@tonic-gate lport = up[1]; 1336*1676Sjpk unlabeled = B_FALSE; 1337*1676Sjpk /* Cred cannot be null on IPv4 */ 1338*1676Sjpk if (is_system_labeled()) 1339*1676Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 1340*1676Sjpk TSLF_UNLABELED) != 0; 1341*1676Sjpk shared_addr = (zoneid == ALL_ZONES); 1342*1676Sjpk if (shared_addr) { 1343*1676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 1344*1676Sjpk /* 1345*1676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 1346*1676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 1347*1676Sjpk * search for the zone based on the packet label. 1348*1676Sjpk * 1349*1676Sjpk * If there is such a zone, we prefer to find a 1350*1676Sjpk * connection in it. Otherwise, we look for a 1351*1676Sjpk * MAC-exempt connection in any zone whose label 1352*1676Sjpk * dominates the default label on the packet. 1353*1676Sjpk */ 1354*1676Sjpk if (zoneid == ALL_ZONES) 1355*1676Sjpk zoneid = tsol_packet_to_zoneid(mp); 1356*1676Sjpk else 1357*1676Sjpk unlabeled = B_FALSE; 1358*1676Sjpk } 1359*1676Sjpk 13600Sstevel@tonic-gate bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 13610Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 13620Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 13630Sstevel@tonic-gate connp = connp->conn_next) { 1364*1676Sjpk if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 1365*1676Sjpk lport) && 1366*1676Sjpk (connp->conn_zoneid == zoneid || 1367*1676Sjpk (unlabeled && connp->conn_mac_exempt))) 13680Sstevel@tonic-gate break; 13690Sstevel@tonic-gate } 13700Sstevel@tonic-gate 1371*1676Sjpk /* 1372*1676Sjpk * If the matching connection is SLP on a private address, then 1373*1676Sjpk * the label on the packet must match the local zone's label. 1374*1676Sjpk * Otherwise, it must be in the label range defined by tnrh. 1375*1676Sjpk * This is ensured by tsol_receive_label. 1376*1676Sjpk */ 1377*1676Sjpk if (connp != NULL && is_system_labeled() && 1378*1676Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1379*1676Sjpk shared_addr, connp)) { 1380*1676Sjpk DTRACE_PROBE3( 1381*1676Sjpk tx__ip__log__info__classify__tcp, 1382*1676Sjpk char *, 1383*1676Sjpk "connp(1) could not receive mp(2)", 1384*1676Sjpk conn_t *, connp, mblk_t *, mp); 1385*1676Sjpk connp = NULL; 1386*1676Sjpk } 1387*1676Sjpk 13880Sstevel@tonic-gate if (connp != NULL) { 1389*1676Sjpk /* Have a listener at least */ 13900Sstevel@tonic-gate CONN_INC_REF(connp); 13910Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 13920Sstevel@tonic-gate return (connp); 13930Sstevel@tonic-gate } 13940Sstevel@tonic-gate 13950Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 13960Sstevel@tonic-gate 13970Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 13980Sstevel@tonic-gate ("ipcl_classify: couldn't classify mp = %p\n", 13990Sstevel@tonic-gate (void *)mp)); 14000Sstevel@tonic-gate break; 14010Sstevel@tonic-gate 14020Sstevel@tonic-gate case IPPROTO_UDP: 14030Sstevel@tonic-gate lport = up[1]; 1404*1676Sjpk unlabeled = B_FALSE; 1405*1676Sjpk /* Cred cannot be null on IPv4 */ 1406*1676Sjpk if (is_system_labeled()) 1407*1676Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 1408*1676Sjpk TSLF_UNLABELED) != 0; 1409*1676Sjpk shared_addr = (zoneid == ALL_ZONES); 1410*1676Sjpk if (shared_addr) { 1411*1676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 1412*1676Sjpk /* 1413*1676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 1414*1676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 1415*1676Sjpk * search for the zone based on the packet label. 1416*1676Sjpk * 1417*1676Sjpk * If there is such a zone, we prefer to find a 1418*1676Sjpk * connection in it. Otherwise, we look for a 1419*1676Sjpk * MAC-exempt connection in any zone whose label 1420*1676Sjpk * dominates the default label on the packet. 1421*1676Sjpk */ 1422*1676Sjpk if (zoneid == ALL_ZONES) 1423*1676Sjpk zoneid = tsol_packet_to_zoneid(mp); 1424*1676Sjpk else 1425*1676Sjpk unlabeled = B_FALSE; 1426*1676Sjpk } 14270Sstevel@tonic-gate fport = up[0]; 14280Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 14290Sstevel@tonic-gate connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 14300Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14310Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 14320Sstevel@tonic-gate connp = connp->conn_next) { 14330Sstevel@tonic-gate if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 14340Sstevel@tonic-gate fport, ipha->ipha_src) && 1435*1676Sjpk (connp->conn_zoneid == zoneid || 1436*1676Sjpk (unlabeled && connp->conn_mac_exempt))) 14370Sstevel@tonic-gate break; 14380Sstevel@tonic-gate } 14390Sstevel@tonic-gate 1440*1676Sjpk if (connp != NULL && is_system_labeled() && 1441*1676Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1442*1676Sjpk shared_addr, connp)) { 1443*1676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp, 1444*1676Sjpk char *, "connp(1) could not receive mp(2)", 1445*1676Sjpk conn_t *, connp, mblk_t *, mp); 1446*1676Sjpk connp = NULL; 1447*1676Sjpk } 1448*1676Sjpk 14490Sstevel@tonic-gate if (connp != NULL) { 14500Sstevel@tonic-gate CONN_INC_REF(connp); 14510Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14520Sstevel@tonic-gate return (connp); 14530Sstevel@tonic-gate } 14540Sstevel@tonic-gate 14550Sstevel@tonic-gate /* 14560Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 14570Sstevel@tonic-gate */ 14580Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14590Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 14600Sstevel@tonic-gate ("ipcl_classify: cant find udp conn_t for ports : %x %x", 14610Sstevel@tonic-gate lport, fport)); 14620Sstevel@tonic-gate break; 14630Sstevel@tonic-gate } 14640Sstevel@tonic-gate 14650Sstevel@tonic-gate return (NULL); 14660Sstevel@tonic-gate } 14670Sstevel@tonic-gate 14680Sstevel@tonic-gate conn_t * 14690Sstevel@tonic-gate ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) 14700Sstevel@tonic-gate { 14710Sstevel@tonic-gate ip6_t *ip6h; 14720Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 14730Sstevel@tonic-gate uint16_t lport; 14740Sstevel@tonic-gate uint16_t fport; 14750Sstevel@tonic-gate tcph_t *tcph; 14760Sstevel@tonic-gate uint32_t ports; 14770Sstevel@tonic-gate conn_t *connp; 14780Sstevel@tonic-gate uint16_t *up; 1479*1676Sjpk boolean_t shared_addr; 1480*1676Sjpk boolean_t unlabeled; 14810Sstevel@tonic-gate 14820Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 14830Sstevel@tonic-gate 14840Sstevel@tonic-gate switch (protocol) { 14850Sstevel@tonic-gate case IPPROTO_TCP: 14860Sstevel@tonic-gate tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 14870Sstevel@tonic-gate up = (uint16_t *)tcph->th_lport; 14880Sstevel@tonic-gate ports = *(uint32_t *)up; 14890Sstevel@tonic-gate 14900Sstevel@tonic-gate connfp = 14910Sstevel@tonic-gate &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, ports)]; 14920Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14930Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 14940Sstevel@tonic-gate connp = connp->conn_next) { 14950Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(connp, protocol, 14960Sstevel@tonic-gate ip6h->ip6_src, ip6h->ip6_dst, ports)) 14970Sstevel@tonic-gate break; 14980Sstevel@tonic-gate } 14990Sstevel@tonic-gate 15000Sstevel@tonic-gate if (connp != NULL) { 1501*1676Sjpk /* 1502*1676Sjpk * We have a fully-bound TCP connection. 1503*1676Sjpk * 1504*1676Sjpk * For labeled systems, there's no need to check the 1505*1676Sjpk * label here. It's known to be good as we checked 1506*1676Sjpk * before allowing the connection to become bound. 1507*1676Sjpk */ 15080Sstevel@tonic-gate CONN_INC_REF(connp); 15090Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15100Sstevel@tonic-gate return (connp); 15110Sstevel@tonic-gate } 15120Sstevel@tonic-gate 15130Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15140Sstevel@tonic-gate 15150Sstevel@tonic-gate lport = up[1]; 1516*1676Sjpk unlabeled = B_FALSE; 1517*1676Sjpk /* Cred can be null on IPv6 */ 1518*1676Sjpk if (is_system_labeled()) { 1519*1676Sjpk cred_t *cr = DB_CRED(mp); 1520*1676Sjpk 1521*1676Sjpk unlabeled = (cr != NULL && 1522*1676Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 1523*1676Sjpk } 1524*1676Sjpk shared_addr = (zoneid == ALL_ZONES); 1525*1676Sjpk if (shared_addr) { 1526*1676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 1527*1676Sjpk /* 1528*1676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 1529*1676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 1530*1676Sjpk * search for the zone based on the packet label. 1531*1676Sjpk * 1532*1676Sjpk * If there is such a zone, we prefer to find a 1533*1676Sjpk * connection in it. Otherwise, we look for a 1534*1676Sjpk * MAC-exempt connection in any zone whose label 1535*1676Sjpk * dominates the default label on the packet. 1536*1676Sjpk */ 1537*1676Sjpk if (zoneid == ALL_ZONES) 1538*1676Sjpk zoneid = tsol_packet_to_zoneid(mp); 1539*1676Sjpk else 1540*1676Sjpk unlabeled = B_FALSE; 1541*1676Sjpk } 1542*1676Sjpk 15430Sstevel@tonic-gate bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 15440Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 15450Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 15460Sstevel@tonic-gate connp = connp->conn_next) { 15470Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, protocol, 15480Sstevel@tonic-gate ip6h->ip6_dst, lport) && 1549*1676Sjpk (connp->conn_zoneid == zoneid || 1550*1676Sjpk (unlabeled && connp->conn_mac_exempt))) 15510Sstevel@tonic-gate break; 15520Sstevel@tonic-gate } 15530Sstevel@tonic-gate 1554*1676Sjpk if (connp != NULL && is_system_labeled() && 1555*1676Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1556*1676Sjpk shared_addr, connp)) { 1557*1676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 1558*1676Sjpk char *, "connp(1) could not receive mp(2)", 1559*1676Sjpk conn_t *, connp, mblk_t *, mp); 1560*1676Sjpk connp = NULL; 1561*1676Sjpk } 1562*1676Sjpk 15630Sstevel@tonic-gate if (connp != NULL) { 15640Sstevel@tonic-gate /* Have a listner at least */ 15650Sstevel@tonic-gate CONN_INC_REF(connp); 15660Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 15670Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 15680Sstevel@tonic-gate ("ipcl_classify_v6: found listner " 15690Sstevel@tonic-gate "connp = %p\n", (void *)connp)); 15700Sstevel@tonic-gate 15710Sstevel@tonic-gate return (connp); 15720Sstevel@tonic-gate } 15730Sstevel@tonic-gate 15740Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 15750Sstevel@tonic-gate 15760Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 15770Sstevel@tonic-gate ("ipcl_classify_v6: couldn't classify mp = %p\n", 15780Sstevel@tonic-gate (void *)mp)); 15790Sstevel@tonic-gate break; 15800Sstevel@tonic-gate 15810Sstevel@tonic-gate case IPPROTO_UDP: 15820Sstevel@tonic-gate up = (uint16_t *)&mp->b_rptr[hdr_len]; 15830Sstevel@tonic-gate lport = up[1]; 1584*1676Sjpk unlabeled = B_FALSE; 1585*1676Sjpk /* Cred can be null on IPv6 */ 1586*1676Sjpk if (is_system_labeled()) { 1587*1676Sjpk cred_t *cr = DB_CRED(mp); 1588*1676Sjpk 1589*1676Sjpk unlabeled = (cr != NULL && 1590*1676Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 1591*1676Sjpk } 1592*1676Sjpk shared_addr = (zoneid == ALL_ZONES); 1593*1676Sjpk if (shared_addr) { 1594*1676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 1595*1676Sjpk /* 1596*1676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 1597*1676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 1598*1676Sjpk * search for the zone based on the packet label. 1599*1676Sjpk * 1600*1676Sjpk * If there is such a zone, we prefer to find a 1601*1676Sjpk * connection in it. Otherwise, we look for a 1602*1676Sjpk * MAC-exempt connection in any zone whose label 1603*1676Sjpk * dominates the default label on the packet. 1604*1676Sjpk */ 1605*1676Sjpk if (zoneid == ALL_ZONES) 1606*1676Sjpk zoneid = tsol_packet_to_zoneid(mp); 1607*1676Sjpk else 1608*1676Sjpk unlabeled = B_FALSE; 1609*1676Sjpk } 1610*1676Sjpk 16110Sstevel@tonic-gate fport = up[0]; 16120Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 16130Sstevel@tonic-gate fport)); 16140Sstevel@tonic-gate connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 16150Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16160Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16170Sstevel@tonic-gate connp = connp->conn_next) { 16180Sstevel@tonic-gate if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 16190Sstevel@tonic-gate fport, ip6h->ip6_src) && 1620*1676Sjpk (connp->conn_zoneid == zoneid || 1621*1676Sjpk (unlabeled && connp->conn_mac_exempt))) 16220Sstevel@tonic-gate break; 16230Sstevel@tonic-gate } 16240Sstevel@tonic-gate 1625*1676Sjpk if (connp != NULL && is_system_labeled() && 1626*1676Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1627*1676Sjpk shared_addr, connp)) { 1628*1676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 1629*1676Sjpk char *, "connp(1) could not receive mp(2)", 1630*1676Sjpk conn_t *, connp, mblk_t *, mp); 1631*1676Sjpk connp = NULL; 1632*1676Sjpk } 1633*1676Sjpk 16340Sstevel@tonic-gate if (connp != NULL) { 16350Sstevel@tonic-gate CONN_INC_REF(connp); 16360Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16370Sstevel@tonic-gate return (connp); 16380Sstevel@tonic-gate } 16390Sstevel@tonic-gate 16400Sstevel@tonic-gate /* 16410Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 16420Sstevel@tonic-gate */ 16430Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16440Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 16450Sstevel@tonic-gate ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 16460Sstevel@tonic-gate lport, fport)); 16470Sstevel@tonic-gate break; 16480Sstevel@tonic-gate } 16490Sstevel@tonic-gate 16500Sstevel@tonic-gate return (NULL); 16510Sstevel@tonic-gate } 16520Sstevel@tonic-gate 16530Sstevel@tonic-gate /* 16540Sstevel@tonic-gate * wrapper around ipcl_classify_(v4,v6) routines. 16550Sstevel@tonic-gate */ 16560Sstevel@tonic-gate conn_t * 16570Sstevel@tonic-gate ipcl_classify(mblk_t *mp, zoneid_t zoneid) 16580Sstevel@tonic-gate { 16590Sstevel@tonic-gate uint16_t hdr_len; 16600Sstevel@tonic-gate ipha_t *ipha; 16610Sstevel@tonic-gate uint8_t *nexthdrp; 16620Sstevel@tonic-gate 16630Sstevel@tonic-gate if (MBLKL(mp) < sizeof (ipha_t)) 16640Sstevel@tonic-gate return (NULL); 16650Sstevel@tonic-gate 16660Sstevel@tonic-gate switch (IPH_HDR_VERSION(mp->b_rptr)) { 16670Sstevel@tonic-gate case IPV4_VERSION: 16680Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 16690Sstevel@tonic-gate hdr_len = IPH_HDR_LENGTH(ipha); 16700Sstevel@tonic-gate return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 16710Sstevel@tonic-gate zoneid)); 16720Sstevel@tonic-gate case IPV6_VERSION: 16730Sstevel@tonic-gate if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 16740Sstevel@tonic-gate &hdr_len, &nexthdrp)) 16750Sstevel@tonic-gate return (NULL); 16760Sstevel@tonic-gate 16770Sstevel@tonic-gate return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid)); 16780Sstevel@tonic-gate } 16790Sstevel@tonic-gate 16800Sstevel@tonic-gate return (NULL); 16810Sstevel@tonic-gate } 16820Sstevel@tonic-gate 16830Sstevel@tonic-gate conn_t * 1684*1676Sjpk ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, 1685*1676Sjpk uint32_t ports, ipha_t *hdr) 16860Sstevel@tonic-gate { 1687*1676Sjpk connf_t *connfp; 16880Sstevel@tonic-gate conn_t *connp; 16890Sstevel@tonic-gate in_port_t lport; 16900Sstevel@tonic-gate int af; 1691*1676Sjpk boolean_t shared_addr; 1692*1676Sjpk boolean_t unlabeled; 1693*1676Sjpk const void *dst; 16940Sstevel@tonic-gate 16950Sstevel@tonic-gate lport = ((uint16_t *)&ports)[1]; 1696*1676Sjpk 1697*1676Sjpk unlabeled = B_FALSE; 1698*1676Sjpk /* Cred can be null on IPv6 */ 1699*1676Sjpk if (is_system_labeled()) { 1700*1676Sjpk cred_t *cr = DB_CRED(mp); 1701*1676Sjpk 1702*1676Sjpk unlabeled = (cr != NULL && 1703*1676Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 1704*1676Sjpk } 1705*1676Sjpk shared_addr = (zoneid == ALL_ZONES); 1706*1676Sjpk if (shared_addr) { 1707*1676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 1708*1676Sjpk /* 1709*1676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 1710*1676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and search for 1711*1676Sjpk * the zone based on the packet label. 1712*1676Sjpk * 1713*1676Sjpk * If there is such a zone, we prefer to find a connection in 1714*1676Sjpk * it. Otherwise, we look for a MAC-exempt connection in any 1715*1676Sjpk * zone whose label dominates the default label on the packet. 1716*1676Sjpk */ 1717*1676Sjpk if (zoneid == ALL_ZONES) 1718*1676Sjpk zoneid = tsol_packet_to_zoneid(mp); 1719*1676Sjpk else 1720*1676Sjpk unlabeled = B_FALSE; 1721*1676Sjpk } 1722*1676Sjpk 17230Sstevel@tonic-gate af = IPH_HDR_VERSION(hdr); 1724*1676Sjpk dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst : 1725*1676Sjpk (const void *)&((ip6_t *)hdr)->ip6_dst; 17260Sstevel@tonic-gate connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; 17270Sstevel@tonic-gate 17280Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 17290Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 17300Sstevel@tonic-gate connp = connp->conn_next) { 17310Sstevel@tonic-gate /* We don't allow v4 fallback for v6 raw socket. */ 1732*1676Sjpk if (af == (connp->conn_af_isv6 ? IPV4_VERSION : 1733*1676Sjpk IPV6_VERSION)) 17340Sstevel@tonic-gate continue; 17350Sstevel@tonic-gate if (connp->conn_fully_bound) { 17360Sstevel@tonic-gate if (af == IPV4_VERSION) { 1737*1676Sjpk if (!IPCL_CONN_MATCH(connp, protocol, 1738*1676Sjpk hdr->ipha_src, hdr->ipha_dst, ports)) 1739*1676Sjpk continue; 17400Sstevel@tonic-gate } else { 1741*1676Sjpk if (!IPCL_CONN_MATCH_V6(connp, protocol, 17420Sstevel@tonic-gate ((ip6_t *)hdr)->ip6_src, 1743*1676Sjpk ((ip6_t *)hdr)->ip6_dst, ports)) 1744*1676Sjpk continue; 17450Sstevel@tonic-gate } 17460Sstevel@tonic-gate } else { 17470Sstevel@tonic-gate if (af == IPV4_VERSION) { 1748*1676Sjpk if (!IPCL_BIND_MATCH(connp, protocol, 1749*1676Sjpk hdr->ipha_dst, lport)) 1750*1676Sjpk continue; 17510Sstevel@tonic-gate } else { 1752*1676Sjpk if (!IPCL_BIND_MATCH_V6(connp, protocol, 1753*1676Sjpk ((ip6_t *)hdr)->ip6_dst, lport)) 1754*1676Sjpk continue; 17550Sstevel@tonic-gate } 17560Sstevel@tonic-gate } 1757*1676Sjpk 1758*1676Sjpk if (connp->conn_zoneid == zoneid || 1759*1676Sjpk (unlabeled && connp->conn_mac_exempt)) 1760*1676Sjpk break; 1761*1676Sjpk } 1762*1676Sjpk /* 1763*1676Sjpk * If the connection is fully-bound and connection-oriented (TCP or 1764*1676Sjpk * SCTP), then we've already validated the remote system's label. 1765*1676Sjpk * There's no need to do it again for every packet. 1766*1676Sjpk */ 1767*1676Sjpk if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound || 1768*1676Sjpk !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) && 1769*1676Sjpk !tsol_receive_local(mp, dst, af, shared_addr, connp)) { 1770*1676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 1771*1676Sjpk char *, "connp(1) could not receive mp(2)", 1772*1676Sjpk conn_t *, connp, mblk_t *, mp); 1773*1676Sjpk connp = NULL; 17740Sstevel@tonic-gate } 1775409Skcpoon 1776409Skcpoon if (connp != NULL) 1777409Skcpoon goto found; 1778409Skcpoon mutex_exit(&connfp->connf_lock); 1779409Skcpoon 1780409Skcpoon /* Try to look for a wildcard match. */ 1781409Skcpoon connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(0)]; 1782409Skcpoon mutex_enter(&connfp->connf_lock); 1783409Skcpoon for (connp = connfp->connf_head; connp != NULL; 1784409Skcpoon connp = connp->conn_next) { 1785409Skcpoon /* We don't allow v4 fallback for v6 raw socket. */ 1786409Skcpoon if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 1787409Skcpoon IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) { 1788409Skcpoon continue; 1789409Skcpoon } 1790409Skcpoon if (af == IPV4_VERSION) { 1791409Skcpoon if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 1792409Skcpoon break; 1793409Skcpoon } else { 1794409Skcpoon if (IPCL_RAW_MATCH_V6(connp, protocol, 1795409Skcpoon ((ip6_t *)hdr)->ip6_dst)) { 1796409Skcpoon break; 1797409Skcpoon } 1798409Skcpoon } 17990Sstevel@tonic-gate } 1800409Skcpoon 1801409Skcpoon if (connp != NULL) 1802409Skcpoon goto found; 1803409Skcpoon 18040Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 18050Sstevel@tonic-gate return (NULL); 1806409Skcpoon 1807409Skcpoon found: 1808409Skcpoon ASSERT(connp != NULL); 1809409Skcpoon CONN_INC_REF(connp); 1810409Skcpoon mutex_exit(&connfp->connf_lock); 1811409Skcpoon return (connp); 18120Sstevel@tonic-gate } 18130Sstevel@tonic-gate 18140Sstevel@tonic-gate /* ARGSUSED */ 18150Sstevel@tonic-gate static int 18160Sstevel@tonic-gate ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags) 18170Sstevel@tonic-gate { 18180Sstevel@tonic-gate itc_t *itc = (itc_t *)buf; 18190Sstevel@tonic-gate conn_t *connp = &itc->itc_conn; 18200Sstevel@tonic-gate tcp_t *tcp = &itc->itc_tcp; 18210Sstevel@tonic-gate bzero(itc, sizeof (itc_t)); 18220Sstevel@tonic-gate tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 18230Sstevel@tonic-gate connp->conn_tcp = tcp; 18240Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN; 18250Sstevel@tonic-gate connp->conn_ulp = IPPROTO_TCP; 18260Sstevel@tonic-gate tcp->tcp_connp = connp; 18270Sstevel@tonic-gate return (0); 18280Sstevel@tonic-gate } 18290Sstevel@tonic-gate 18300Sstevel@tonic-gate /* ARGSUSED */ 18310Sstevel@tonic-gate static void 18320Sstevel@tonic-gate ipcl_tcpconn_destructor(void *buf, void *cdrarg) 18330Sstevel@tonic-gate { 18340Sstevel@tonic-gate tcp_timermp_free(((conn_t *)buf)->conn_tcp); 18350Sstevel@tonic-gate } 18360Sstevel@tonic-gate 18370Sstevel@tonic-gate /* 18380Sstevel@tonic-gate * All conns are inserted in a global multi-list for the benefit of 18390Sstevel@tonic-gate * walkers. The walk is guaranteed to walk all open conns at the time 18400Sstevel@tonic-gate * of the start of the walk exactly once. This property is needed to 18410Sstevel@tonic-gate * achieve some cleanups during unplumb of interfaces. This is achieved 18420Sstevel@tonic-gate * as follows. 18430Sstevel@tonic-gate * 18440Sstevel@tonic-gate * ipcl_conn_create and ipcl_conn_destroy are the only functions that 18450Sstevel@tonic-gate * call the insert and delete functions below at creation and deletion 18460Sstevel@tonic-gate * time respectively. The conn never moves or changes its position in this 18470Sstevel@tonic-gate * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 18480Sstevel@tonic-gate * won't increase due to walkers, once the conn deletion has started. Note 18490Sstevel@tonic-gate * that we can't remove the conn from the global list and then wait for 18500Sstevel@tonic-gate * the refcnt to drop to zero, since walkers would then see a truncated 18510Sstevel@tonic-gate * list. CONN_INCIPIENT ensures that walkers don't start looking at 18520Sstevel@tonic-gate * conns until ip_open is ready to make them globally visible. 18530Sstevel@tonic-gate * The global round robin multi-list locks are held only to get the 18540Sstevel@tonic-gate * next member/insertion/deletion and contention should be negligible 18550Sstevel@tonic-gate * if the multi-list is much greater than the number of cpus. 18560Sstevel@tonic-gate */ 18570Sstevel@tonic-gate void 18580Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp) 18590Sstevel@tonic-gate { 18600Sstevel@tonic-gate int index; 18610Sstevel@tonic-gate 18620Sstevel@tonic-gate /* 18630Sstevel@tonic-gate * No need for atomic here. Approximate even distribution 18640Sstevel@tonic-gate * in the global lists is sufficient. 18650Sstevel@tonic-gate */ 18660Sstevel@tonic-gate conn_g_index++; 18670Sstevel@tonic-gate index = conn_g_index & (CONN_G_HASH_SIZE - 1); 18680Sstevel@tonic-gate 18690Sstevel@tonic-gate connp->conn_g_prev = NULL; 18700Sstevel@tonic-gate /* 18710Sstevel@tonic-gate * Mark as INCIPIENT, so that walkers will ignore this 18720Sstevel@tonic-gate * for now, till ip_open is ready to make it visible globally. 18730Sstevel@tonic-gate */ 18740Sstevel@tonic-gate connp->conn_state_flags |= CONN_INCIPIENT; 18750Sstevel@tonic-gate 18760Sstevel@tonic-gate /* Insert at the head of the list */ 18770Sstevel@tonic-gate mutex_enter(&ipcl_globalhash_fanout[index].connf_lock); 18780Sstevel@tonic-gate connp->conn_g_next = ipcl_globalhash_fanout[index].connf_head; 18790Sstevel@tonic-gate if (connp->conn_g_next != NULL) 18800Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp; 18810Sstevel@tonic-gate ipcl_globalhash_fanout[index].connf_head = connp; 18820Sstevel@tonic-gate 18830Sstevel@tonic-gate /* The fanout bucket this conn points to */ 18840Sstevel@tonic-gate connp->conn_g_fanout = &ipcl_globalhash_fanout[index]; 18850Sstevel@tonic-gate 18860Sstevel@tonic-gate mutex_exit(&ipcl_globalhash_fanout[index].connf_lock); 18870Sstevel@tonic-gate } 18880Sstevel@tonic-gate 18890Sstevel@tonic-gate void 18900Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp) 18910Sstevel@tonic-gate { 18920Sstevel@tonic-gate /* 18930Sstevel@tonic-gate * We were never inserted in the global multi list. 18940Sstevel@tonic-gate * IPCL_NONE variety is never inserted in the global multilist 18950Sstevel@tonic-gate * since it is presumed to not need any cleanup and is transient. 18960Sstevel@tonic-gate */ 18970Sstevel@tonic-gate if (connp->conn_g_fanout == NULL) 18980Sstevel@tonic-gate return; 18990Sstevel@tonic-gate 19000Sstevel@tonic-gate mutex_enter(&connp->conn_g_fanout->connf_lock); 19010Sstevel@tonic-gate if (connp->conn_g_prev != NULL) 19020Sstevel@tonic-gate connp->conn_g_prev->conn_g_next = connp->conn_g_next; 19030Sstevel@tonic-gate else 19040Sstevel@tonic-gate connp->conn_g_fanout->connf_head = connp->conn_g_next; 19050Sstevel@tonic-gate if (connp->conn_g_next != NULL) 19060Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 19070Sstevel@tonic-gate mutex_exit(&connp->conn_g_fanout->connf_lock); 19080Sstevel@tonic-gate 19090Sstevel@tonic-gate /* Better to stumble on a null pointer than to corrupt memory */ 19100Sstevel@tonic-gate connp->conn_g_next = NULL; 19110Sstevel@tonic-gate connp->conn_g_prev = NULL; 19120Sstevel@tonic-gate } 19130Sstevel@tonic-gate 19140Sstevel@tonic-gate /* 19150Sstevel@tonic-gate * Walk the list of all conn_t's in the system, calling the function provided 19160Sstevel@tonic-gate * with the specified argument for each. 19170Sstevel@tonic-gate * Applies to both IPv4 and IPv6. 19180Sstevel@tonic-gate * 19190Sstevel@tonic-gate * IPCs may hold pointers to ipif/ill. To guard against stale pointers 19200Sstevel@tonic-gate * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 19210Sstevel@tonic-gate * unplumbed or removed. New conn_t's that are created while we are walking 19220Sstevel@tonic-gate * may be missed by this walk, because they are not necessarily inserted 19230Sstevel@tonic-gate * at the tail of the list. They are new conn_t's and thus don't have any 19240Sstevel@tonic-gate * stale pointers. The CONN_CLOSING flag ensures that no new reference 19250Sstevel@tonic-gate * is created to the struct that is going away. 19260Sstevel@tonic-gate */ 19270Sstevel@tonic-gate void 19280Sstevel@tonic-gate ipcl_walk(pfv_t func, void *arg) 19290Sstevel@tonic-gate { 19300Sstevel@tonic-gate int i; 19310Sstevel@tonic-gate conn_t *connp; 19320Sstevel@tonic-gate conn_t *prev_connp; 19330Sstevel@tonic-gate 19340Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 19350Sstevel@tonic-gate mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); 19360Sstevel@tonic-gate prev_connp = NULL; 19370Sstevel@tonic-gate connp = ipcl_globalhash_fanout[i].connf_head; 19380Sstevel@tonic-gate while (connp != NULL) { 19390Sstevel@tonic-gate mutex_enter(&connp->conn_lock); 19400Sstevel@tonic-gate if (connp->conn_state_flags & 19410Sstevel@tonic-gate (CONN_CONDEMNED | CONN_INCIPIENT)) { 19420Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 19430Sstevel@tonic-gate connp = connp->conn_g_next; 19440Sstevel@tonic-gate continue; 19450Sstevel@tonic-gate } 19460Sstevel@tonic-gate CONN_INC_REF_LOCKED(connp); 19470Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 19480Sstevel@tonic-gate mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); 19490Sstevel@tonic-gate (*func)(connp, arg); 19500Sstevel@tonic-gate if (prev_connp != NULL) 19510Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 19520Sstevel@tonic-gate mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); 19530Sstevel@tonic-gate prev_connp = connp; 19540Sstevel@tonic-gate connp = connp->conn_g_next; 19550Sstevel@tonic-gate } 19560Sstevel@tonic-gate mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); 19570Sstevel@tonic-gate if (prev_connp != NULL) 19580Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 19590Sstevel@tonic-gate } 19600Sstevel@tonic-gate } 19610Sstevel@tonic-gate 19620Sstevel@tonic-gate /* 19630Sstevel@tonic-gate * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 19640Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 19650Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 19660Sstevel@tonic-gate * (peer tcp in at least ESTABLISHED state). 19670Sstevel@tonic-gate */ 19680Sstevel@tonic-gate conn_t * 19690Sstevel@tonic-gate ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph) 19700Sstevel@tonic-gate { 19710Sstevel@tonic-gate uint32_t ports; 19720Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 19730Sstevel@tonic-gate connf_t *connfp; 19740Sstevel@tonic-gate conn_t *tconnp; 19750Sstevel@tonic-gate boolean_t zone_chk; 19760Sstevel@tonic-gate 19770Sstevel@tonic-gate /* 19780Sstevel@tonic-gate * If either the source of destination address is loopback, then 19790Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 19800Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 19810Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. 19820Sstevel@tonic-gate */ 19830Sstevel@tonic-gate zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 19840Sstevel@tonic-gate ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 19850Sstevel@tonic-gate 19860Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 19870Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 19880Sstevel@tonic-gate 19890Sstevel@tonic-gate connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; 19900Sstevel@tonic-gate 19910Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 19920Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 19930Sstevel@tonic-gate tconnp = tconnp->conn_next) { 19940Sstevel@tonic-gate 19950Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 19960Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 19970Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && 19980Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 19990Sstevel@tonic-gate 20000Sstevel@tonic-gate ASSERT(tconnp != connp); 20010Sstevel@tonic-gate CONN_INC_REF(tconnp); 20020Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 20030Sstevel@tonic-gate return (tconnp); 20040Sstevel@tonic-gate } 20050Sstevel@tonic-gate } 20060Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 20070Sstevel@tonic-gate return (NULL); 20080Sstevel@tonic-gate } 20090Sstevel@tonic-gate 20100Sstevel@tonic-gate /* 20110Sstevel@tonic-gate * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 20120Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 20130Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 20140Sstevel@tonic-gate * (peer tcp in at least ESTABLISHED state). 20150Sstevel@tonic-gate */ 20160Sstevel@tonic-gate conn_t * 20170Sstevel@tonic-gate ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph) 20180Sstevel@tonic-gate { 20190Sstevel@tonic-gate uint32_t ports; 20200Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 20210Sstevel@tonic-gate connf_t *connfp; 20220Sstevel@tonic-gate conn_t *tconnp; 20230Sstevel@tonic-gate boolean_t zone_chk; 20240Sstevel@tonic-gate 20250Sstevel@tonic-gate /* 20260Sstevel@tonic-gate * If either the source of destination address is loopback, then 20270Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 20280Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 20290Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. We 20300Sstevel@tonic-gate * don't do Zone check for link local address(es) because the 20310Sstevel@tonic-gate * current Zone implementation treats each link local address as 20320Sstevel@tonic-gate * being unique per system node, i.e. they belong to global Zone. 20330Sstevel@tonic-gate */ 20340Sstevel@tonic-gate zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 20350Sstevel@tonic-gate IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 20360Sstevel@tonic-gate 20370Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 20380Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 20390Sstevel@tonic-gate 20400Sstevel@tonic-gate connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; 20410Sstevel@tonic-gate 20420Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 20430Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 20440Sstevel@tonic-gate tconnp = tconnp->conn_next) { 20450Sstevel@tonic-gate 20460Sstevel@tonic-gate /* We skip tcp_bound_if check here as this is loopback tcp */ 20470Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 20480Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 20490Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && 20500Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 20510Sstevel@tonic-gate 20520Sstevel@tonic-gate ASSERT(tconnp != connp); 20530Sstevel@tonic-gate CONN_INC_REF(tconnp); 20540Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 20550Sstevel@tonic-gate return (tconnp); 20560Sstevel@tonic-gate } 20570Sstevel@tonic-gate } 20580Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 20590Sstevel@tonic-gate return (NULL); 20600Sstevel@tonic-gate } 20610Sstevel@tonic-gate 20620Sstevel@tonic-gate /* 20630Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 20640Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 20650Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 20660Sstevel@tonic-gate */ 20670Sstevel@tonic-gate conn_t * 20680Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state) 20690Sstevel@tonic-gate { 20700Sstevel@tonic-gate uint32_t ports; 20710Sstevel@tonic-gate uint16_t *pports; 20720Sstevel@tonic-gate connf_t *connfp; 20730Sstevel@tonic-gate conn_t *tconnp; 20740Sstevel@tonic-gate 20750Sstevel@tonic-gate pports = (uint16_t *)&ports; 20760Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 20770Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 20780Sstevel@tonic-gate 20790Sstevel@tonic-gate connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; 20800Sstevel@tonic-gate 20810Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 20820Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 20830Sstevel@tonic-gate tconnp = tconnp->conn_next) { 20840Sstevel@tonic-gate 20850Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 20860Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 20870Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= min_state) { 20880Sstevel@tonic-gate 20890Sstevel@tonic-gate CONN_INC_REF(tconnp); 20900Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 20910Sstevel@tonic-gate return (tconnp); 20920Sstevel@tonic-gate } 20930Sstevel@tonic-gate } 20940Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 20950Sstevel@tonic-gate return (NULL); 20960Sstevel@tonic-gate } 20970Sstevel@tonic-gate 20980Sstevel@tonic-gate /* 20990Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 21000Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 21010Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 21020Sstevel@tonic-gate * Match on ifindex in addition to addresses. 21030Sstevel@tonic-gate */ 21040Sstevel@tonic-gate conn_t * 21050Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 21060Sstevel@tonic-gate uint_t ifindex) 21070Sstevel@tonic-gate { 21080Sstevel@tonic-gate tcp_t *tcp; 21090Sstevel@tonic-gate uint32_t ports; 21100Sstevel@tonic-gate uint16_t *pports; 21110Sstevel@tonic-gate connf_t *connfp; 21120Sstevel@tonic-gate conn_t *tconnp; 21130Sstevel@tonic-gate 21140Sstevel@tonic-gate pports = (uint16_t *)&ports; 21150Sstevel@tonic-gate pports[0] = tcpha->tha_fport; 21160Sstevel@tonic-gate pports[1] = tcpha->tha_lport; 21170Sstevel@tonic-gate 21180Sstevel@tonic-gate connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; 21190Sstevel@tonic-gate 21200Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 21210Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 21220Sstevel@tonic-gate tconnp = tconnp->conn_next) { 21230Sstevel@tonic-gate 21240Sstevel@tonic-gate tcp = tconnp->conn_tcp; 21250Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 21260Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 21270Sstevel@tonic-gate tcp->tcp_state >= min_state && 21280Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 21290Sstevel@tonic-gate tcp->tcp_bound_if == ifindex)) { 21300Sstevel@tonic-gate 21310Sstevel@tonic-gate CONN_INC_REF(tconnp); 21320Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 21330Sstevel@tonic-gate return (tconnp); 21340Sstevel@tonic-gate } 21350Sstevel@tonic-gate } 21360Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 21370Sstevel@tonic-gate return (NULL); 21380Sstevel@tonic-gate } 21390Sstevel@tonic-gate 21400Sstevel@tonic-gate /* 2141*1676Sjpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 2142*1676Sjpk * a listener when changing state. 21430Sstevel@tonic-gate */ 21440Sstevel@tonic-gate conn_t * 21450Sstevel@tonic-gate ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid) 21460Sstevel@tonic-gate { 21470Sstevel@tonic-gate connf_t *bind_connfp; 21480Sstevel@tonic-gate conn_t *connp; 21490Sstevel@tonic-gate tcp_t *tcp; 21500Sstevel@tonic-gate 21510Sstevel@tonic-gate /* 21520Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 21530Sstevel@tonic-gate * all zeros. 21540Sstevel@tonic-gate */ 21550Sstevel@tonic-gate if (laddr == 0) 21560Sstevel@tonic-gate return (NULL); 21570Sstevel@tonic-gate 2158*1676Sjpk ASSERT(zoneid != ALL_ZONES); 2159*1676Sjpk 21600Sstevel@tonic-gate bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 21610Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 21620Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 21630Sstevel@tonic-gate connp = connp->conn_next) { 21640Sstevel@tonic-gate tcp = connp->conn_tcp; 21650Sstevel@tonic-gate if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 21660Sstevel@tonic-gate connp->conn_zoneid == zoneid && 21670Sstevel@tonic-gate (tcp->tcp_listener == NULL)) { 21680Sstevel@tonic-gate CONN_INC_REF(connp); 21690Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 21700Sstevel@tonic-gate return (connp); 21710Sstevel@tonic-gate } 21720Sstevel@tonic-gate } 21730Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 21740Sstevel@tonic-gate return (NULL); 21750Sstevel@tonic-gate } 21760Sstevel@tonic-gate 2177*1676Sjpk /* 2178*1676Sjpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 2179*1676Sjpk * a listener when changing state. 2180*1676Sjpk */ 21810Sstevel@tonic-gate conn_t * 21820Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 21830Sstevel@tonic-gate zoneid_t zoneid) 21840Sstevel@tonic-gate { 21850Sstevel@tonic-gate connf_t *bind_connfp; 21860Sstevel@tonic-gate conn_t *connp = NULL; 21870Sstevel@tonic-gate tcp_t *tcp; 21880Sstevel@tonic-gate 21890Sstevel@tonic-gate /* 21900Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 21910Sstevel@tonic-gate * all zeros. 21920Sstevel@tonic-gate */ 21930Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 21940Sstevel@tonic-gate return (NULL); 21950Sstevel@tonic-gate 2196*1676Sjpk ASSERT(zoneid != ALL_ZONES); 21970Sstevel@tonic-gate 21980Sstevel@tonic-gate bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 21990Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 22000Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 22010Sstevel@tonic-gate connp = connp->conn_next) { 22020Sstevel@tonic-gate tcp = connp->conn_tcp; 22030Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 22040Sstevel@tonic-gate connp->conn_zoneid == zoneid && 22050Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 22060Sstevel@tonic-gate tcp->tcp_bound_if == ifindex) && 22070Sstevel@tonic-gate tcp->tcp_listener == NULL) { 22080Sstevel@tonic-gate CONN_INC_REF(connp); 22090Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 22100Sstevel@tonic-gate return (connp); 22110Sstevel@tonic-gate } 22120Sstevel@tonic-gate } 22130Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 22140Sstevel@tonic-gate return (NULL); 22150Sstevel@tonic-gate } 22160Sstevel@tonic-gate 2217741Smasputra /* 2218741Smasputra * ipcl_get_next_conn 2219741Smasputra * get the next entry in the conn global list 2220741Smasputra * and put a reference on the next_conn. 2221741Smasputra * decrement the reference on the current conn. 2222741Smasputra * 2223741Smasputra * This is an iterator based walker function that also provides for 2224741Smasputra * some selection by the caller. It walks through the conn_hash bucket 2225741Smasputra * searching for the next valid connp in the list, and selects connections 2226741Smasputra * that are neither closed nor condemned. It also REFHOLDS the conn 2227741Smasputra * thus ensuring that the conn exists when the caller uses the conn. 2228741Smasputra */ 2229741Smasputra conn_t * 2230741Smasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2231741Smasputra { 2232741Smasputra conn_t *next_connp; 2233741Smasputra 2234741Smasputra if (connfp == NULL) 2235741Smasputra return (NULL); 2236741Smasputra 2237741Smasputra mutex_enter(&connfp->connf_lock); 2238741Smasputra 2239741Smasputra next_connp = (connp == NULL) ? 2240741Smasputra connfp->connf_head : connp->conn_g_next; 2241741Smasputra 2242741Smasputra while (next_connp != NULL) { 2243741Smasputra mutex_enter(&next_connp->conn_lock); 2244741Smasputra if (!(next_connp->conn_flags & conn_flags) || 2245741Smasputra (next_connp->conn_state_flags & 2246741Smasputra (CONN_CONDEMNED | CONN_INCIPIENT))) { 2247741Smasputra /* 2248741Smasputra * This conn has been condemned or 2249741Smasputra * is closing, or the flags don't match 2250741Smasputra */ 2251741Smasputra mutex_exit(&next_connp->conn_lock); 2252741Smasputra next_connp = next_connp->conn_g_next; 2253741Smasputra continue; 2254741Smasputra } 2255741Smasputra CONN_INC_REF_LOCKED(next_connp); 2256741Smasputra mutex_exit(&next_connp->conn_lock); 2257741Smasputra break; 2258741Smasputra } 2259741Smasputra 2260741Smasputra mutex_exit(&connfp->connf_lock); 2261741Smasputra 2262741Smasputra if (connp != NULL) 2263741Smasputra CONN_DEC_REF(connp); 2264741Smasputra 2265741Smasputra return (next_connp); 2266741Smasputra } 2267741Smasputra 22680Sstevel@tonic-gate #ifdef CONN_DEBUG 22690Sstevel@tonic-gate /* 22700Sstevel@tonic-gate * Trace of the last NBUF refhold/refrele 22710Sstevel@tonic-gate */ 22720Sstevel@tonic-gate int 22730Sstevel@tonic-gate conn_trace_ref(conn_t *connp) 22740Sstevel@tonic-gate { 22750Sstevel@tonic-gate int last; 22760Sstevel@tonic-gate conn_trace_t *ctb; 22770Sstevel@tonic-gate 22780Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 22790Sstevel@tonic-gate last = connp->conn_trace_last; 22800Sstevel@tonic-gate last++; 22810Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 22820Sstevel@tonic-gate last = 0; 22830Sstevel@tonic-gate 22840Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 22850Sstevel@tonic-gate ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 22860Sstevel@tonic-gate connp->conn_trace_last = last; 22870Sstevel@tonic-gate return (1); 22880Sstevel@tonic-gate } 22890Sstevel@tonic-gate 22900Sstevel@tonic-gate int 22910Sstevel@tonic-gate conn_untrace_ref(conn_t *connp) 22920Sstevel@tonic-gate { 22930Sstevel@tonic-gate int last; 22940Sstevel@tonic-gate conn_trace_t *ctb; 22950Sstevel@tonic-gate 22960Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 22970Sstevel@tonic-gate last = connp->conn_trace_last; 22980Sstevel@tonic-gate last++; 22990Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 23000Sstevel@tonic-gate last = 0; 23010Sstevel@tonic-gate 23020Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 23030Sstevel@tonic-gate ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 23040Sstevel@tonic-gate connp->conn_trace_last = last; 23050Sstevel@tonic-gate return (1); 23060Sstevel@tonic-gate } 23070Sstevel@tonic-gate #endif 2308