10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51503Sericheng * Common Development and Distribution License (the "License"). 61503Sericheng * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*8485SPeter.Memishian@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* 270Sstevel@tonic-gate * IP PACKET CLASSIFIER 280Sstevel@tonic-gate * 290Sstevel@tonic-gate * The IP packet classifier provides mapping between IP packets and persistent 300Sstevel@tonic-gate * connection state for connection-oriented protocols. It also provides 310Sstevel@tonic-gate * interface for managing connection states. 320Sstevel@tonic-gate * 330Sstevel@tonic-gate * The connection state is kept in conn_t data structure and contains, among 340Sstevel@tonic-gate * other things: 350Sstevel@tonic-gate * 360Sstevel@tonic-gate * o local/remote address and ports 370Sstevel@tonic-gate * o Transport protocol 380Sstevel@tonic-gate * o squeue for the connection (for TCP only) 390Sstevel@tonic-gate * o reference counter 400Sstevel@tonic-gate * o Connection state 410Sstevel@tonic-gate * o hash table linkage 420Sstevel@tonic-gate * o interface/ire information 430Sstevel@tonic-gate * o credentials 440Sstevel@tonic-gate * o ipsec policy 450Sstevel@tonic-gate * o send and receive functions. 460Sstevel@tonic-gate * o mutex lock. 470Sstevel@tonic-gate * 480Sstevel@tonic-gate * Connections use a reference counting scheme. They are freed when the 490Sstevel@tonic-gate * reference counter drops to zero. A reference is incremented when connection 500Sstevel@tonic-gate * is placed in a list or table, when incoming packet for the connection arrives 510Sstevel@tonic-gate * and when connection is processed via squeue (squeue processing may be 520Sstevel@tonic-gate * asynchronous and the reference protects the connection from being destroyed 530Sstevel@tonic-gate * before its processing is finished). 540Sstevel@tonic-gate * 550Sstevel@tonic-gate * send and receive functions are currently used for TCP only. The send function 560Sstevel@tonic-gate * determines the IP entry point for the packet once it leaves TCP to be sent to 570Sstevel@tonic-gate * the destination address. The receive function is used by IP when the packet 580Sstevel@tonic-gate * should be passed for TCP processing. When a new connection is created these 590Sstevel@tonic-gate * are set to ip_output() and tcp_input() respectively. During the lifetime of 600Sstevel@tonic-gate * the connection the send and receive functions may change depending on the 610Sstevel@tonic-gate * changes in the connection state. For example, Once the connection is bound to 620Sstevel@tonic-gate * an addresse, the receive function for this connection is set to 630Sstevel@tonic-gate * tcp_conn_request(). This allows incoming SYNs to go directly into the 640Sstevel@tonic-gate * listener SYN processing function without going to tcp_input() first. 650Sstevel@tonic-gate * 660Sstevel@tonic-gate * Classifier uses several hash tables: 670Sstevel@tonic-gate * 680Sstevel@tonic-gate * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 690Sstevel@tonic-gate * ipcl_bind_fanout: contains all connections in BOUND state 700Sstevel@tonic-gate * ipcl_proto_fanout: IPv4 protocol fanout 710Sstevel@tonic-gate * ipcl_proto_fanout_v6: IPv6 protocol fanout 720Sstevel@tonic-gate * ipcl_udp_fanout: contains all UDP connections 730Sstevel@tonic-gate * ipcl_globalhash_fanout: contains all connections 740Sstevel@tonic-gate * 750Sstevel@tonic-gate * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 760Sstevel@tonic-gate * which need to view all existing connections. 770Sstevel@tonic-gate * 780Sstevel@tonic-gate * All tables are protected by per-bucket locks. When both per-bucket lock and 790Sstevel@tonic-gate * connection lock need to be held, the per-bucket lock should be acquired 800Sstevel@tonic-gate * first, followed by the connection lock. 810Sstevel@tonic-gate * 820Sstevel@tonic-gate * All functions doing search in one of these tables increment a reference 830Sstevel@tonic-gate * counter on the connection found (if any). This reference should be dropped 840Sstevel@tonic-gate * when the caller has finished processing the connection. 850Sstevel@tonic-gate * 860Sstevel@tonic-gate * 870Sstevel@tonic-gate * INTERFACES: 880Sstevel@tonic-gate * =========== 890Sstevel@tonic-gate * 900Sstevel@tonic-gate * Connection Lookup: 910Sstevel@tonic-gate * ------------------ 920Sstevel@tonic-gate * 933448Sdh155122 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack) 943448Sdh155122 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack) 950Sstevel@tonic-gate * 960Sstevel@tonic-gate * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 970Sstevel@tonic-gate * it can't find any associated connection. If the connection is found, its 980Sstevel@tonic-gate * reference counter is incremented. 990Sstevel@tonic-gate * 1000Sstevel@tonic-gate * mp: mblock, containing packet header. The full header should fit 1010Sstevel@tonic-gate * into a single mblock. It should also contain at least full IP 1020Sstevel@tonic-gate * and TCP or UDP header. 1030Sstevel@tonic-gate * 1040Sstevel@tonic-gate * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 1050Sstevel@tonic-gate * 1060Sstevel@tonic-gate * hdr_len: The size of IP header. It is used to find TCP or UDP header in 1070Sstevel@tonic-gate * the packet. 1080Sstevel@tonic-gate * 1091676Sjpk * zoneid: The zone in which the returned connection must be; the zoneid 1101676Sjpk * corresponding to the ire_zoneid on the IRE located for the 1111676Sjpk * packet's destination address. 1120Sstevel@tonic-gate * 1130Sstevel@tonic-gate * For TCP connections, the lookup order is as follows: 1140Sstevel@tonic-gate * 5-tuple {src, dst, protocol, local port, remote port} 1150Sstevel@tonic-gate * lookup in ipcl_conn_fanout table. 1160Sstevel@tonic-gate * 3-tuple {dst, remote port, protocol} lookup in 1170Sstevel@tonic-gate * ipcl_bind_fanout table. 1180Sstevel@tonic-gate * 1190Sstevel@tonic-gate * For UDP connections, a 5-tuple {src, dst, protocol, local port, 1200Sstevel@tonic-gate * remote port} lookup is done on ipcl_udp_fanout. Note that, 1210Sstevel@tonic-gate * these interfaces do not handle cases where a packets belongs 1220Sstevel@tonic-gate * to multiple UDP clients, which is handled in IP itself. 1230Sstevel@tonic-gate * 1241676Sjpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 1251676Sjpk * determine which actual zone gets the segment. This is used only in a 1261676Sjpk * labeled environment. The matching rules are: 1271676Sjpk * 1281676Sjpk * - If it's not a multilevel port, then the label on the packet selects 1291676Sjpk * the zone. Unlabeled packets are delivered to the global zone. 1301676Sjpk * 1311676Sjpk * - If it's a multilevel port, then only the zone registered to receive 1321676Sjpk * packets on that port matches. 1331676Sjpk * 1341676Sjpk * Also, in a labeled environment, packet labels need to be checked. For fully 1351676Sjpk * bound TCP connections, we can assume that the packet label was checked 1361676Sjpk * during connection establishment, and doesn't need to be checked on each 1371676Sjpk * packet. For others, though, we need to check for strict equality or, for 1381676Sjpk * multilevel ports, membership in the range or set. This part currently does 1391676Sjpk * a tnrh lookup on each packet, but could be optimized to use cached results 1401676Sjpk * if that were necessary. (SCTP doesn't come through here, but if it did, 1411676Sjpk * we would apply the same rules as TCP.) 1421676Sjpk * 1431676Sjpk * An implication of the above is that fully-bound TCP sockets must always use 1441676Sjpk * distinct 4-tuples; they can't be discriminated by label alone. 1451676Sjpk * 1461676Sjpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 1471676Sjpk * as there's no connection set-up handshake and no shared state. 1481676Sjpk * 1491676Sjpk * Labels on looped-back packets within a single zone do not need to be 1501676Sjpk * checked, as all processes in the same zone have the same label. 1511676Sjpk * 1521676Sjpk * Finally, for unlabeled packets received by a labeled system, special rules 1531676Sjpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a 1541676Sjpk * socket in the zone whose label matches the default label of the sender, if 1551676Sjpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 1561676Sjpk * receiver's label must dominate the sender's default label. 1571676Sjpk * 1583448Sdh155122 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack); 1593448Sdh155122 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 1603448Sdh155122 * ip_stack); 1610Sstevel@tonic-gate * 1620Sstevel@tonic-gate * Lookup routine to find a exact match for {src, dst, local port, 1630Sstevel@tonic-gate * remote port) for TCP connections in ipcl_conn_fanout. The address and 1640Sstevel@tonic-gate * ports are read from the IP and TCP header respectively. 1650Sstevel@tonic-gate * 1663448Sdh155122 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 1673448Sdh155122 * zoneid, ip_stack); 1683448Sdh155122 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 1693448Sdh155122 * zoneid, ip_stack); 1700Sstevel@tonic-gate * 1710Sstevel@tonic-gate * Lookup routine to find a listener with the tuple {lport, laddr, 1720Sstevel@tonic-gate * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 1730Sstevel@tonic-gate * parameter interface index is also compared. 1740Sstevel@tonic-gate * 1753448Sdh155122 * void ipcl_walk(func, arg, ip_stack) 1760Sstevel@tonic-gate * 1770Sstevel@tonic-gate * Apply 'func' to every connection available. The 'func' is called as 1780Sstevel@tonic-gate * (*func)(connp, arg). The walk is non-atomic so connections may be 1790Sstevel@tonic-gate * created and destroyed during the walk. The CONN_CONDEMNED and 1800Sstevel@tonic-gate * CONN_INCIPIENT flags ensure that connections which are newly created 1810Sstevel@tonic-gate * or being destroyed are not selected by the walker. 1820Sstevel@tonic-gate * 1830Sstevel@tonic-gate * Table Updates 1840Sstevel@tonic-gate * ------------- 1850Sstevel@tonic-gate * 1860Sstevel@tonic-gate * int ipcl_conn_insert(connp, protocol, src, dst, ports) 1870Sstevel@tonic-gate * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 1880Sstevel@tonic-gate * 1890Sstevel@tonic-gate * Insert 'connp' in the ipcl_conn_fanout. 1900Sstevel@tonic-gate * Arguements : 1910Sstevel@tonic-gate * connp conn_t to be inserted 1920Sstevel@tonic-gate * protocol connection protocol 1930Sstevel@tonic-gate * src source address 1940Sstevel@tonic-gate * dst destination address 1950Sstevel@tonic-gate * ports local and remote port 1960Sstevel@tonic-gate * ifindex interface index for IPv6 connections 1970Sstevel@tonic-gate * 1980Sstevel@tonic-gate * Return value : 1990Sstevel@tonic-gate * 0 if connp was inserted 2000Sstevel@tonic-gate * EADDRINUSE if the connection with the same tuple 2010Sstevel@tonic-gate * already exists. 2020Sstevel@tonic-gate * 2030Sstevel@tonic-gate * int ipcl_bind_insert(connp, protocol, src, lport); 2040Sstevel@tonic-gate * int ipcl_bind_insert_v6(connp, protocol, src, lport); 2050Sstevel@tonic-gate * 2060Sstevel@tonic-gate * Insert 'connp' in ipcl_bind_fanout. 2070Sstevel@tonic-gate * Arguements : 2080Sstevel@tonic-gate * connp conn_t to be inserted 2090Sstevel@tonic-gate * protocol connection protocol 2100Sstevel@tonic-gate * src source address connection wants 2110Sstevel@tonic-gate * to bind to 2120Sstevel@tonic-gate * lport local port connection wants to 2130Sstevel@tonic-gate * bind to 2140Sstevel@tonic-gate * 2150Sstevel@tonic-gate * 2160Sstevel@tonic-gate * void ipcl_hash_remove(connp); 2170Sstevel@tonic-gate * 2180Sstevel@tonic-gate * Removes the 'connp' from the connection fanout table. 2190Sstevel@tonic-gate * 2200Sstevel@tonic-gate * Connection Creation/Destruction 2210Sstevel@tonic-gate * ------------------------------- 2220Sstevel@tonic-gate * 2233448Sdh155122 * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 2240Sstevel@tonic-gate * 2250Sstevel@tonic-gate * Creates a new conn based on the type flag, inserts it into 2260Sstevel@tonic-gate * globalhash table. 2270Sstevel@tonic-gate * 2280Sstevel@tonic-gate * type: This flag determines the type of conn_t which needs to be 2295240Snordmark * created i.e., which kmem_cache it comes from. 2300Sstevel@tonic-gate * IPCL_TCPCONN indicates a TCP connection 2315240Snordmark * IPCL_SCTPCONN indicates a SCTP connection 2325240Snordmark * IPCL_UDPCONN indicates a UDP conn_t. 2335240Snordmark * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 2345240Snordmark * IPCL_RTSCONN indicates a RTS conn_t. 2355240Snordmark * IPCL_IPCCONN indicates all other connections. 2360Sstevel@tonic-gate * 2370Sstevel@tonic-gate * void ipcl_conn_destroy(connp) 2380Sstevel@tonic-gate * 2390Sstevel@tonic-gate * Destroys the connection state, removes it from the global 2400Sstevel@tonic-gate * connection hash table and frees its memory. 2410Sstevel@tonic-gate */ 2420Sstevel@tonic-gate 2430Sstevel@tonic-gate #include <sys/types.h> 2440Sstevel@tonic-gate #include <sys/stream.h> 2450Sstevel@tonic-gate #include <sys/stropts.h> 2460Sstevel@tonic-gate #include <sys/sysmacros.h> 2470Sstevel@tonic-gate #include <sys/strsubr.h> 2480Sstevel@tonic-gate #include <sys/strsun.h> 2490Sstevel@tonic-gate #define _SUN_TPI_VERSION 2 2500Sstevel@tonic-gate #include <sys/ddi.h> 2510Sstevel@tonic-gate #include <sys/cmn_err.h> 2520Sstevel@tonic-gate #include <sys/debug.h> 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate #include <sys/systm.h> 2550Sstevel@tonic-gate #include <sys/param.h> 2560Sstevel@tonic-gate #include <sys/kmem.h> 2570Sstevel@tonic-gate #include <sys/isa_defs.h> 2580Sstevel@tonic-gate #include <inet/common.h> 2590Sstevel@tonic-gate #include <netinet/ip6.h> 2600Sstevel@tonic-gate #include <netinet/icmp6.h> 2610Sstevel@tonic-gate 2620Sstevel@tonic-gate #include <inet/ip.h> 2630Sstevel@tonic-gate #include <inet/ip6.h> 2640Sstevel@tonic-gate #include <inet/ip_ndp.h> 2658348SEric.Yu@Sun.COM #include <inet/ip_impl.h> 266741Smasputra #include <inet/udp_impl.h> 2670Sstevel@tonic-gate #include <inet/sctp_ip.h> 2683448Sdh155122 #include <inet/sctp/sctp_impl.h> 2695240Snordmark #include <inet/rawip_impl.h> 2705240Snordmark #include <inet/rts_impl.h> 2710Sstevel@tonic-gate 2720Sstevel@tonic-gate #include <sys/cpuvar.h> 2730Sstevel@tonic-gate 2740Sstevel@tonic-gate #include <inet/ipclassifier.h> 2758348SEric.Yu@Sun.COM #include <inet/tcp.h> 2760Sstevel@tonic-gate #include <inet/ipsec_impl.h> 2770Sstevel@tonic-gate 2781676Sjpk #include <sys/tsol/tnet.h> 2798348SEric.Yu@Sun.COM #include <sys/sockio.h> 2801676Sjpk 2810Sstevel@tonic-gate #ifdef DEBUG 2820Sstevel@tonic-gate #define IPCL_DEBUG 2830Sstevel@tonic-gate #else 2840Sstevel@tonic-gate #undef IPCL_DEBUG 2850Sstevel@tonic-gate #endif 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate #ifdef IPCL_DEBUG 2880Sstevel@tonic-gate int ipcl_debug_level = 0; 2890Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) \ 2900Sstevel@tonic-gate if (ipcl_debug_level & level) { printf args; } 2910Sstevel@tonic-gate #else 2920Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) {; } 2930Sstevel@tonic-gate #endif 2943448Sdh155122 /* Old value for compatibility. Setable in /etc/system */ 2950Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0; 2960Sstevel@tonic-gate 2973448Sdh155122 /* New value. Zero means choose automatically. Setable in /etc/system */ 2980Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0; 2990Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192; 3000Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500; 3010Sstevel@tonic-gate 3020Sstevel@tonic-gate /* bind/udp fanout table size */ 3030Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512; 3041503Sericheng uint_t ipcl_udp_fanout_size = 16384; 3050Sstevel@tonic-gate 3060Sstevel@tonic-gate /* Raw socket fanout size. Must be a power of 2. */ 3070Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256; 3080Sstevel@tonic-gate 3090Sstevel@tonic-gate /* 3100Sstevel@tonic-gate * Power of 2^N Primes useful for hashing for N of 0-28, 3110Sstevel@tonic-gate * these primes are the nearest prime <= 2^N - 2^(N-2). 3120Sstevel@tonic-gate */ 3130Sstevel@tonic-gate 3140Sstevel@tonic-gate #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 3150Sstevel@tonic-gate 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 3160Sstevel@tonic-gate 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 3170Sstevel@tonic-gate 50331599, 100663291, 201326557, 0} 3180Sstevel@tonic-gate 3190Sstevel@tonic-gate /* 3205240Snordmark * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 3215240Snordmark * are aligned on cache lines. 3220Sstevel@tonic-gate */ 3235240Snordmark typedef union itc_s { 3245240Snordmark conn_t itc_conn; 3255240Snordmark char itcu_filler[CACHE_ALIGN(conn_s)]; 3260Sstevel@tonic-gate } itc_t; 3270Sstevel@tonic-gate 3285240Snordmark struct kmem_cache *tcp_conn_cache; 3295240Snordmark struct kmem_cache *ip_conn_cache; 3308348SEric.Yu@Sun.COM struct kmem_cache *ip_helper_stream_cache; 3310Sstevel@tonic-gate extern struct kmem_cache *sctp_conn_cache; 3320Sstevel@tonic-gate extern struct kmem_cache *tcp_sack_info_cache; 3330Sstevel@tonic-gate extern struct kmem_cache *tcp_iphc_cache; 3345240Snordmark struct kmem_cache *udp_conn_cache; 3355240Snordmark struct kmem_cache *rawip_conn_cache; 3365240Snordmark struct kmem_cache *rts_conn_cache; 3370Sstevel@tonic-gate 3380Sstevel@tonic-gate extern void tcp_timermp_free(tcp_t *); 3390Sstevel@tonic-gate extern mblk_t *tcp_timermp_alloc(int); 3400Sstevel@tonic-gate 3415240Snordmark static int ip_conn_constructor(void *, void *, int); 3425240Snordmark static void ip_conn_destructor(void *, void *); 3435240Snordmark 3445240Snordmark static int tcp_conn_constructor(void *, void *, int); 3455240Snordmark static void tcp_conn_destructor(void *, void *); 3465240Snordmark 3475240Snordmark static int udp_conn_constructor(void *, void *, int); 3485240Snordmark static void udp_conn_destructor(void *, void *); 3495240Snordmark 3505240Snordmark static int rawip_conn_constructor(void *, void *, int); 3515240Snordmark static void rawip_conn_destructor(void *, void *); 3525240Snordmark 3535240Snordmark static int rts_conn_constructor(void *, void *, int); 3545240Snordmark static void rts_conn_destructor(void *, void *); 3550Sstevel@tonic-gate 3568348SEric.Yu@Sun.COM static int ip_helper_stream_constructor(void *, void *, int); 3578348SEric.Yu@Sun.COM static void ip_helper_stream_destructor(void *, void *); 3588348SEric.Yu@Sun.COM 3598348SEric.Yu@Sun.COM boolean_t ip_use_helper_cache = B_TRUE; 3608348SEric.Yu@Sun.COM 3618392SHuafeng.Lv@Sun.COM /* 3628392SHuafeng.Lv@Sun.COM * Hook functions to enable cluster networking 3638392SHuafeng.Lv@Sun.COM * On non-clustered systems these vectors must always be NULL. 3648392SHuafeng.Lv@Sun.COM */ 3658392SHuafeng.Lv@Sun.COM extern void (*cl_inet_listen)(netstackid_t, uint8_t, sa_family_t, 3668392SHuafeng.Lv@Sun.COM uint8_t *, in_port_t, void *); 3678392SHuafeng.Lv@Sun.COM extern void (*cl_inet_unlisten)(netstackid_t, uint8_t, sa_family_t, 3688392SHuafeng.Lv@Sun.COM uint8_t *, in_port_t, void *); 3698392SHuafeng.Lv@Sun.COM 3700Sstevel@tonic-gate #ifdef IPCL_DEBUG 3710Sstevel@tonic-gate #define INET_NTOA_BUFSIZE 18 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate static char * 3740Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b) 3750Sstevel@tonic-gate { 3760Sstevel@tonic-gate unsigned char *p; 3770Sstevel@tonic-gate 3780Sstevel@tonic-gate p = (unsigned char *)∈ 3790Sstevel@tonic-gate (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 3800Sstevel@tonic-gate return (b); 3810Sstevel@tonic-gate } 3820Sstevel@tonic-gate #endif 3830Sstevel@tonic-gate 3840Sstevel@tonic-gate /* 3853448Sdh155122 * Global (for all stack instances) init routine 3860Sstevel@tonic-gate */ 3870Sstevel@tonic-gate void 3883448Sdh155122 ipcl_g_init(void) 3890Sstevel@tonic-gate { 3905240Snordmark ip_conn_cache = kmem_cache_create("ip_conn_cache", 3910Sstevel@tonic-gate sizeof (conn_t), CACHE_ALIGN_SIZE, 3925240Snordmark ip_conn_constructor, ip_conn_destructor, 3935240Snordmark NULL, NULL, NULL, 0); 3945240Snordmark 3955240Snordmark tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 3965240Snordmark sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 3975240Snordmark tcp_conn_constructor, tcp_conn_destructor, 3985240Snordmark NULL, NULL, NULL, 0); 3990Sstevel@tonic-gate 4005240Snordmark udp_conn_cache = kmem_cache_create("udp_conn_cache", 4015240Snordmark sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 4025240Snordmark udp_conn_constructor, udp_conn_destructor, 4035240Snordmark NULL, NULL, NULL, 0); 4045240Snordmark 4055240Snordmark rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 4065240Snordmark sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 4075240Snordmark rawip_conn_constructor, rawip_conn_destructor, 4085240Snordmark NULL, NULL, NULL, 0); 4095240Snordmark 4105240Snordmark rts_conn_cache = kmem_cache_create("rts_conn_cache", 4115240Snordmark sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 4125240Snordmark rts_conn_constructor, rts_conn_destructor, 4130Sstevel@tonic-gate NULL, NULL, NULL, 0); 4148348SEric.Yu@Sun.COM 4158348SEric.Yu@Sun.COM if (ip_use_helper_cache) { 4168348SEric.Yu@Sun.COM ip_helper_stream_cache = kmem_cache_create 4178348SEric.Yu@Sun.COM ("ip_helper_stream_cache", sizeof (ip_helper_stream_info_t), 4188348SEric.Yu@Sun.COM CACHE_ALIGN_SIZE, ip_helper_stream_constructor, 4198348SEric.Yu@Sun.COM ip_helper_stream_destructor, NULL, NULL, NULL, 0); 4208348SEric.Yu@Sun.COM } else { 4218348SEric.Yu@Sun.COM ip_helper_stream_cache = NULL; 4228348SEric.Yu@Sun.COM } 4233448Sdh155122 } 4243448Sdh155122 4253448Sdh155122 /* 4263448Sdh155122 * ipclassifier intialization routine, sets up hash tables. 4273448Sdh155122 */ 4283448Sdh155122 void 4293448Sdh155122 ipcl_init(ip_stack_t *ipst) 4303448Sdh155122 { 4313448Sdh155122 int i; 4323448Sdh155122 int sizes[] = P2Ps(); 4330Sstevel@tonic-gate 4340Sstevel@tonic-gate /* 4353448Sdh155122 * Calculate size of conn fanout table from /etc/system settings 4360Sstevel@tonic-gate */ 4370Sstevel@tonic-gate if (ipcl_conn_hash_size != 0) { 4383448Sdh155122 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 4390Sstevel@tonic-gate } else if (tcp_conn_hash_size != 0) { 4403448Sdh155122 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 4410Sstevel@tonic-gate } else { 4420Sstevel@tonic-gate extern pgcnt_t freemem; 4430Sstevel@tonic-gate 4443448Sdh155122 ipst->ips_ipcl_conn_fanout_size = 4450Sstevel@tonic-gate (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 4460Sstevel@tonic-gate 4473448Sdh155122 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 4483448Sdh155122 ipst->ips_ipcl_conn_fanout_size = 4493448Sdh155122 ipcl_conn_hash_maxsize; 4503448Sdh155122 } 4510Sstevel@tonic-gate } 4520Sstevel@tonic-gate 4530Sstevel@tonic-gate for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 4543448Sdh155122 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 4550Sstevel@tonic-gate break; 4560Sstevel@tonic-gate } 4570Sstevel@tonic-gate } 4583448Sdh155122 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 4590Sstevel@tonic-gate /* Out of range, use the 2^16 value */ 4603448Sdh155122 ipst->ips_ipcl_conn_fanout_size = sizes[16]; 4610Sstevel@tonic-gate } 4623448Sdh155122 4633448Sdh155122 /* Take values from /etc/system */ 4643448Sdh155122 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 4653448Sdh155122 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 4663448Sdh155122 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 4670Sstevel@tonic-gate 4683448Sdh155122 ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 4693448Sdh155122 4703448Sdh155122 ipst->ips_ipcl_conn_fanout = kmem_zalloc( 4713448Sdh155122 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 4723448Sdh155122 4733448Sdh155122 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 4743448Sdh155122 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 4750Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4760Sstevel@tonic-gate } 4770Sstevel@tonic-gate 4783448Sdh155122 ipst->ips_ipcl_bind_fanout = kmem_zalloc( 4793448Sdh155122 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 4800Sstevel@tonic-gate 4813448Sdh155122 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 4823448Sdh155122 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 4830Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4840Sstevel@tonic-gate } 4850Sstevel@tonic-gate 4863448Sdh155122 ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX * 4873448Sdh155122 sizeof (connf_t), KM_SLEEP); 4883448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 4893448Sdh155122 mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL, 4900Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4910Sstevel@tonic-gate } 4923448Sdh155122 4933448Sdh155122 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 4943448Sdh155122 sizeof (connf_t), KM_SLEEP); 4953448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 4963448Sdh155122 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 4970Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4980Sstevel@tonic-gate } 4990Sstevel@tonic-gate 5003448Sdh155122 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 5013448Sdh155122 mutex_init(&ipst->ips_rts_clients->connf_lock, 5023448Sdh155122 NULL, MUTEX_DEFAULT, NULL); 5030Sstevel@tonic-gate 5043448Sdh155122 ipst->ips_ipcl_udp_fanout = kmem_zalloc( 5053448Sdh155122 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 5063448Sdh155122 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 5073448Sdh155122 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 5080Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 5090Sstevel@tonic-gate } 5100Sstevel@tonic-gate 5113448Sdh155122 ipst->ips_ipcl_raw_fanout = kmem_zalloc( 5123448Sdh155122 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 5133448Sdh155122 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 5143448Sdh155122 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 5150Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 5160Sstevel@tonic-gate } 5170Sstevel@tonic-gate 5183448Sdh155122 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 5193448Sdh155122 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 5200Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5213448Sdh155122 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 5223448Sdh155122 NULL, MUTEX_DEFAULT, NULL); 5230Sstevel@tonic-gate } 5240Sstevel@tonic-gate } 5250Sstevel@tonic-gate 5260Sstevel@tonic-gate void 5273448Sdh155122 ipcl_g_destroy(void) 5280Sstevel@tonic-gate { 5295240Snordmark kmem_cache_destroy(ip_conn_cache); 5305240Snordmark kmem_cache_destroy(tcp_conn_cache); 5315240Snordmark kmem_cache_destroy(udp_conn_cache); 5325240Snordmark kmem_cache_destroy(rawip_conn_cache); 5335240Snordmark kmem_cache_destroy(rts_conn_cache); 5343448Sdh155122 } 5353448Sdh155122 5363448Sdh155122 /* 5373448Sdh155122 * All user-level and kernel use of the stack must be gone 5383448Sdh155122 * by now. 5393448Sdh155122 */ 5403448Sdh155122 void 5413448Sdh155122 ipcl_destroy(ip_stack_t *ipst) 5423448Sdh155122 { 5433448Sdh155122 int i; 5443448Sdh155122 5453448Sdh155122 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 5463448Sdh155122 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 5473448Sdh155122 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 5483448Sdh155122 } 5493448Sdh155122 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 5503448Sdh155122 sizeof (connf_t)); 5513448Sdh155122 ipst->ips_ipcl_conn_fanout = NULL; 5523448Sdh155122 5533448Sdh155122 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 5543448Sdh155122 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 5553448Sdh155122 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 5563448Sdh155122 } 5573448Sdh155122 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 5583448Sdh155122 sizeof (connf_t)); 5593448Sdh155122 ipst->ips_ipcl_bind_fanout = NULL; 5603448Sdh155122 5613448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 5623448Sdh155122 ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL); 5633448Sdh155122 mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock); 5643448Sdh155122 } 5653448Sdh155122 kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t)); 5663448Sdh155122 ipst->ips_ipcl_proto_fanout = NULL; 5670Sstevel@tonic-gate 5683448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 5693448Sdh155122 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 5703448Sdh155122 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 5713448Sdh155122 } 5723448Sdh155122 kmem_free(ipst->ips_ipcl_proto_fanout_v6, 5733448Sdh155122 IPPROTO_MAX * sizeof (connf_t)); 5743448Sdh155122 ipst->ips_ipcl_proto_fanout_v6 = NULL; 5753448Sdh155122 5763448Sdh155122 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 5773448Sdh155122 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 5783448Sdh155122 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 5793448Sdh155122 } 5803448Sdh155122 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 5813448Sdh155122 sizeof (connf_t)); 5823448Sdh155122 ipst->ips_ipcl_udp_fanout = NULL; 5830Sstevel@tonic-gate 5843448Sdh155122 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 5853448Sdh155122 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 5863448Sdh155122 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 5873448Sdh155122 } 5883448Sdh155122 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 5893448Sdh155122 sizeof (connf_t)); 5903448Sdh155122 ipst->ips_ipcl_raw_fanout = NULL; 5910Sstevel@tonic-gate 5923448Sdh155122 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5933448Sdh155122 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 5943448Sdh155122 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 5953448Sdh155122 } 5963448Sdh155122 kmem_free(ipst->ips_ipcl_globalhash_fanout, 5973448Sdh155122 sizeof (connf_t) * CONN_G_HASH_SIZE); 5983448Sdh155122 ipst->ips_ipcl_globalhash_fanout = NULL; 5990Sstevel@tonic-gate 6003448Sdh155122 ASSERT(ipst->ips_rts_clients->connf_head == NULL); 6013448Sdh155122 mutex_destroy(&ipst->ips_rts_clients->connf_lock); 6023448Sdh155122 kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 6033448Sdh155122 ipst->ips_rts_clients = NULL; 6040Sstevel@tonic-gate } 6050Sstevel@tonic-gate 6060Sstevel@tonic-gate /* 6070Sstevel@tonic-gate * conn creation routine. initialize the conn, sets the reference 6080Sstevel@tonic-gate * and inserts it in the global hash table. 6090Sstevel@tonic-gate */ 6100Sstevel@tonic-gate conn_t * 6113448Sdh155122 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 6120Sstevel@tonic-gate { 6130Sstevel@tonic-gate conn_t *connp; 6143448Sdh155122 sctp_stack_t *sctps; 6155240Snordmark struct kmem_cache *conn_cache; 6160Sstevel@tonic-gate 6170Sstevel@tonic-gate switch (type) { 6180Sstevel@tonic-gate case IPCL_SCTPCONN: 6190Sstevel@tonic-gate if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 6200Sstevel@tonic-gate return (NULL); 6214691Skcpoon sctp_conn_init(connp); 6223448Sdh155122 sctps = ns->netstack_sctp; 6233448Sdh155122 SCTP_G_Q_REFHOLD(sctps); 6243448Sdh155122 netstack_hold(ns); 6253448Sdh155122 connp->conn_netstack = ns; 6265240Snordmark return (connp); 6275240Snordmark 6285240Snordmark case IPCL_TCPCONN: 6295240Snordmark conn_cache = tcp_conn_cache; 6300Sstevel@tonic-gate break; 6315240Snordmark 6325240Snordmark case IPCL_UDPCONN: 6335240Snordmark conn_cache = udp_conn_cache; 6345240Snordmark break; 6355240Snordmark 6365240Snordmark case IPCL_RAWIPCONN: 6375240Snordmark conn_cache = rawip_conn_cache; 6385240Snordmark break; 6395240Snordmark 6405240Snordmark case IPCL_RTSCONN: 6415240Snordmark conn_cache = rts_conn_cache; 6425240Snordmark break; 6435240Snordmark 6440Sstevel@tonic-gate case IPCL_IPCCONN: 6455240Snordmark conn_cache = ip_conn_cache; 6460Sstevel@tonic-gate break; 6475240Snordmark 648741Smasputra default: 649741Smasputra connp = NULL; 650741Smasputra ASSERT(0); 6510Sstevel@tonic-gate } 6520Sstevel@tonic-gate 6535240Snordmark if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 6545240Snordmark return (NULL); 6555240Snordmark 6565240Snordmark connp->conn_ref = 1; 6575240Snordmark netstack_hold(ns); 6585240Snordmark connp->conn_netstack = ns; 6595240Snordmark ipcl_globalhash_insert(connp); 6600Sstevel@tonic-gate return (connp); 6610Sstevel@tonic-gate } 6620Sstevel@tonic-gate 6630Sstevel@tonic-gate void 6640Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp) 6650Sstevel@tonic-gate { 6660Sstevel@tonic-gate mblk_t *mp; 6673448Sdh155122 netstack_t *ns = connp->conn_netstack; 6680Sstevel@tonic-gate 6690Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&connp->conn_lock)); 6700Sstevel@tonic-gate ASSERT(connp->conn_ref == 0); 6710Sstevel@tonic-gate ASSERT(connp->conn_ire_cache == NULL); 6720Sstevel@tonic-gate 6737502Saruna@cs.umn.edu DTRACE_PROBE1(conn__destroy, conn_t *, connp); 6747502Saruna@cs.umn.edu 6751676Sjpk if (connp->conn_peercred != NULL && 6761676Sjpk connp->conn_peercred != connp->conn_cred) 6771676Sjpk crfree(connp->conn_peercred); 6781676Sjpk connp->conn_peercred = NULL; 6791676Sjpk 6801676Sjpk if (connp->conn_cred != NULL) { 6811676Sjpk crfree(connp->conn_cred); 6821676Sjpk connp->conn_cred = NULL; 6831676Sjpk } 6841676Sjpk 6850Sstevel@tonic-gate ipcl_globalhash_remove(connp); 6860Sstevel@tonic-gate 6875240Snordmark /* FIXME: add separate tcp_conn_free()? */ 6880Sstevel@tonic-gate if (connp->conn_flags & IPCL_TCPCONN) { 689741Smasputra tcp_t *tcp = connp->conn_tcp; 6903448Sdh155122 tcp_stack_t *tcps; 6913448Sdh155122 6923448Sdh155122 ASSERT(tcp != NULL); 6933448Sdh155122 tcps = tcp->tcp_tcps; 6943448Sdh155122 if (tcps != NULL) { 6953448Sdh155122 if (connp->conn_latch != NULL) { 6963448Sdh155122 IPLATCH_REFRELE(connp->conn_latch, ns); 6973448Sdh155122 connp->conn_latch = NULL; 6983448Sdh155122 } 6993448Sdh155122 if (connp->conn_policy != NULL) { 7003448Sdh155122 IPPH_REFRELE(connp->conn_policy, ns); 7013448Sdh155122 connp->conn_policy = NULL; 7023448Sdh155122 } 7033448Sdh155122 tcp->tcp_tcps = NULL; 7043448Sdh155122 TCPS_REFRELE(tcps); 7053448Sdh155122 } 706741Smasputra 7070Sstevel@tonic-gate tcp_free(tcp); 7080Sstevel@tonic-gate mp = tcp->tcp_timercache; 7091676Sjpk tcp->tcp_cred = NULL; 7100Sstevel@tonic-gate 7110Sstevel@tonic-gate if (tcp->tcp_sack_info != NULL) { 7120Sstevel@tonic-gate bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 7130Sstevel@tonic-gate kmem_cache_free(tcp_sack_info_cache, 7140Sstevel@tonic-gate tcp->tcp_sack_info); 7150Sstevel@tonic-gate } 7160Sstevel@tonic-gate if (tcp->tcp_iphc != NULL) { 7170Sstevel@tonic-gate if (tcp->tcp_hdr_grown) { 7180Sstevel@tonic-gate kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 7190Sstevel@tonic-gate } else { 7200Sstevel@tonic-gate bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 7210Sstevel@tonic-gate kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 7220Sstevel@tonic-gate } 7230Sstevel@tonic-gate tcp->tcp_iphc_len = 0; 7240Sstevel@tonic-gate } 7250Sstevel@tonic-gate ASSERT(tcp->tcp_iphc_len == 0); 7260Sstevel@tonic-gate 7278014SKacheong.Poon@Sun.COM /* 7288014SKacheong.Poon@Sun.COM * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate 7298014SKacheong.Poon@Sun.COM * the mblk. 7308014SKacheong.Poon@Sun.COM */ 7318014SKacheong.Poon@Sun.COM if (tcp->tcp_rsrv_mp != NULL) { 7328014SKacheong.Poon@Sun.COM freeb(tcp->tcp_rsrv_mp); 7338014SKacheong.Poon@Sun.COM tcp->tcp_rsrv_mp = NULL; 7348014SKacheong.Poon@Sun.COM mutex_destroy(&tcp->tcp_rsrv_mp_lock); 7358014SKacheong.Poon@Sun.COM } 7368014SKacheong.Poon@Sun.COM 7373448Sdh155122 ASSERT(connp->conn_latch == NULL); 7383448Sdh155122 ASSERT(connp->conn_policy == NULL); 7393448Sdh155122 7403448Sdh155122 if (ns != NULL) { 7413448Sdh155122 ASSERT(tcp->tcp_tcps == NULL); 7423448Sdh155122 connp->conn_netstack = NULL; 7433448Sdh155122 netstack_rele(ns); 7443448Sdh155122 } 7455240Snordmark 7465240Snordmark ipcl_conn_cleanup(connp); 7475240Snordmark connp->conn_flags = IPCL_TCPCONN; 7485240Snordmark bzero(tcp, sizeof (tcp_t)); 7495240Snordmark 7505240Snordmark tcp->tcp_timercache = mp; 7515240Snordmark tcp->tcp_connp = connp; 7525240Snordmark kmem_cache_free(tcp_conn_cache, connp); 7535240Snordmark return; 7545240Snordmark } 7555240Snordmark if (connp->conn_latch != NULL) { 7565240Snordmark IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack); 7575240Snordmark connp->conn_latch = NULL; 7585240Snordmark } 7595240Snordmark if (connp->conn_policy != NULL) { 7605240Snordmark IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); 7615240Snordmark connp->conn_policy = NULL; 7625240Snordmark } 7635240Snordmark if (connp->conn_ipsec_opt_mp != NULL) { 7645240Snordmark freemsg(connp->conn_ipsec_opt_mp); 7655240Snordmark connp->conn_ipsec_opt_mp = NULL; 7665240Snordmark } 7675240Snordmark 7685240Snordmark if (connp->conn_flags & IPCL_SCTPCONN) { 7693448Sdh155122 ASSERT(ns != NULL); 7700Sstevel@tonic-gate sctp_free(connp); 7715240Snordmark return; 7725240Snordmark } 7735240Snordmark 7745240Snordmark if (ns != NULL) { 7755240Snordmark connp->conn_netstack = NULL; 7765240Snordmark netstack_rele(ns); 7775240Snordmark } 7788348SEric.Yu@Sun.COM 7795240Snordmark ipcl_conn_cleanup(connp); 7805240Snordmark 7815240Snordmark /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 7825240Snordmark if (connp->conn_flags & IPCL_UDPCONN) { 7835240Snordmark connp->conn_flags = IPCL_UDPCONN; 7845240Snordmark kmem_cache_free(udp_conn_cache, connp); 7855240Snordmark } else if (connp->conn_flags & IPCL_RAWIPCONN) { 7868348SEric.Yu@Sun.COM 7875240Snordmark connp->conn_flags = IPCL_RAWIPCONN; 7885240Snordmark connp->conn_ulp = IPPROTO_ICMP; 7895240Snordmark kmem_cache_free(rawip_conn_cache, connp); 7905240Snordmark } else if (connp->conn_flags & IPCL_RTSCONN) { 7915240Snordmark connp->conn_flags = IPCL_RTSCONN; 7925240Snordmark kmem_cache_free(rts_conn_cache, connp); 7930Sstevel@tonic-gate } else { 7945240Snordmark connp->conn_flags = IPCL_IPCCONN; 7955240Snordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 7965240Snordmark ASSERT(connp->conn_priv == NULL); 7975240Snordmark kmem_cache_free(ip_conn_cache, connp); 7980Sstevel@tonic-gate } 7990Sstevel@tonic-gate } 8000Sstevel@tonic-gate 8010Sstevel@tonic-gate /* 8020Sstevel@tonic-gate * Running in cluster mode - deregister listener information 8030Sstevel@tonic-gate */ 8040Sstevel@tonic-gate 8050Sstevel@tonic-gate static void 8060Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp) 8070Sstevel@tonic-gate { 8080Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 8090Sstevel@tonic-gate ASSERT(connp->conn_lport != 0); 8100Sstevel@tonic-gate 8110Sstevel@tonic-gate if (cl_inet_unlisten != NULL) { 8120Sstevel@tonic-gate sa_family_t addr_family; 8130Sstevel@tonic-gate uint8_t *laddrp; 8140Sstevel@tonic-gate 8150Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 8160Sstevel@tonic-gate addr_family = AF_INET6; 8170Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source_v6; 8180Sstevel@tonic-gate } else { 8190Sstevel@tonic-gate addr_family = AF_INET; 8200Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 8210Sstevel@tonic-gate } 8228392SHuafeng.Lv@Sun.COM (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid, 8238392SHuafeng.Lv@Sun.COM IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL); 8240Sstevel@tonic-gate } 8250Sstevel@tonic-gate connp->conn_flags &= ~IPCL_CL_LISTENER; 8260Sstevel@tonic-gate } 8270Sstevel@tonic-gate 8280Sstevel@tonic-gate /* 8290Sstevel@tonic-gate * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 8300Sstevel@tonic-gate * which table the conn belonged to). So for debugging we can see which hash 8310Sstevel@tonic-gate * table this connection was in. 8320Sstevel@tonic-gate */ 8330Sstevel@tonic-gate #define IPCL_HASH_REMOVE(connp) { \ 8340Sstevel@tonic-gate connf_t *connfp = (connp)->conn_fanout; \ 8350Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 8360Sstevel@tonic-gate if (connfp != NULL) { \ 8370Sstevel@tonic-gate IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 8380Sstevel@tonic-gate (void *)(connp))); \ 8390Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); \ 8400Sstevel@tonic-gate if ((connp)->conn_next != NULL) \ 8410Sstevel@tonic-gate (connp)->conn_next->conn_prev = \ 8420Sstevel@tonic-gate (connp)->conn_prev; \ 8430Sstevel@tonic-gate if ((connp)->conn_prev != NULL) \ 8440Sstevel@tonic-gate (connp)->conn_prev->conn_next = \ 8450Sstevel@tonic-gate (connp)->conn_next; \ 8460Sstevel@tonic-gate else \ 8470Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; \ 8480Sstevel@tonic-gate (connp)->conn_fanout = NULL; \ 8490Sstevel@tonic-gate (connp)->conn_next = NULL; \ 8500Sstevel@tonic-gate (connp)->conn_prev = NULL; \ 8510Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; \ 8520Sstevel@tonic-gate if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 8530Sstevel@tonic-gate ipcl_conn_unlisten((connp)); \ 8540Sstevel@tonic-gate CONN_DEC_REF((connp)); \ 8550Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); \ 8560Sstevel@tonic-gate } \ 8570Sstevel@tonic-gate } 8580Sstevel@tonic-gate 8590Sstevel@tonic-gate void 8600Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp) 8610Sstevel@tonic-gate { 8620Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 8630Sstevel@tonic-gate } 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate /* 8660Sstevel@tonic-gate * The whole purpose of this function is allow removal of 8670Sstevel@tonic-gate * a conn_t from the connected hash for timewait reclaim. 8680Sstevel@tonic-gate * This is essentially a TW reclaim fastpath where timewait 8690Sstevel@tonic-gate * collector checks under fanout lock (so no one else can 8700Sstevel@tonic-gate * get access to the conn_t) that refcnt is 2 i.e. one for 8710Sstevel@tonic-gate * TCP and one for the classifier hash list. If ref count 8720Sstevel@tonic-gate * is indeed 2, we can just remove the conn under lock and 8730Sstevel@tonic-gate * avoid cleaning up the conn under squeue. This gives us 8740Sstevel@tonic-gate * improved performance. 8750Sstevel@tonic-gate */ 8760Sstevel@tonic-gate void 8770Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 8780Sstevel@tonic-gate { 8790Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connfp->connf_lock)); 8800Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 8810Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 8820Sstevel@tonic-gate 8830Sstevel@tonic-gate if ((connp)->conn_next != NULL) { 8844691Skcpoon (connp)->conn_next->conn_prev = (connp)->conn_prev; 8850Sstevel@tonic-gate } 8860Sstevel@tonic-gate if ((connp)->conn_prev != NULL) { 8874691Skcpoon (connp)->conn_prev->conn_next = (connp)->conn_next; 8880Sstevel@tonic-gate } else { 8890Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; 8900Sstevel@tonic-gate } 8910Sstevel@tonic-gate (connp)->conn_fanout = NULL; 8920Sstevel@tonic-gate (connp)->conn_next = NULL; 8930Sstevel@tonic-gate (connp)->conn_prev = NULL; 8940Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; 8950Sstevel@tonic-gate ASSERT((connp)->conn_ref == 2); 8960Sstevel@tonic-gate (connp)->conn_ref--; 8970Sstevel@tonic-gate } 8980Sstevel@tonic-gate 8990Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 9000Sstevel@tonic-gate ASSERT((connp)->conn_fanout == NULL); \ 9010Sstevel@tonic-gate ASSERT((connp)->conn_next == NULL); \ 9020Sstevel@tonic-gate ASSERT((connp)->conn_prev == NULL); \ 9030Sstevel@tonic-gate if ((connfp)->connf_head != NULL) { \ 9040Sstevel@tonic-gate (connfp)->connf_head->conn_prev = (connp); \ 9050Sstevel@tonic-gate (connp)->conn_next = (connfp)->connf_head; \ 9060Sstevel@tonic-gate } \ 9070Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9080Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 9090Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9100Sstevel@tonic-gate IPCL_CONNECTED; \ 9110Sstevel@tonic-gate CONN_INC_REF(connp); \ 9120Sstevel@tonic-gate } 9130Sstevel@tonic-gate 9140Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 9150Sstevel@tonic-gate IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 9160Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 9170Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9180Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9190Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 9200Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9210Sstevel@tonic-gate } 9220Sstevel@tonic-gate 9230Sstevel@tonic-gate #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 9240Sstevel@tonic-gate conn_t *pconnp = NULL, *nconnp; \ 9250Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 9260Sstevel@tonic-gate "connp %p", (void *)connfp, (void *)(connp))); \ 9270Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9280Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9290Sstevel@tonic-gate nconnp = (connfp)->connf_head; \ 930153Sethindra while (nconnp != NULL && \ 931153Sethindra !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 932153Sethindra pconnp = nconnp; \ 933153Sethindra nconnp = nconnp->conn_next; \ 9340Sstevel@tonic-gate } \ 9350Sstevel@tonic-gate if (pconnp != NULL) { \ 9360Sstevel@tonic-gate pconnp->conn_next = (connp); \ 9370Sstevel@tonic-gate (connp)->conn_prev = pconnp; \ 9380Sstevel@tonic-gate } else { \ 9390Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 9400Sstevel@tonic-gate } \ 9410Sstevel@tonic-gate if (nconnp != NULL) { \ 9420Sstevel@tonic-gate (connp)->conn_next = nconnp; \ 9430Sstevel@tonic-gate nconnp->conn_prev = (connp); \ 9440Sstevel@tonic-gate } \ 9450Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9460Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9470Sstevel@tonic-gate IPCL_BOUND; \ 9480Sstevel@tonic-gate CONN_INC_REF(connp); \ 9490Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9500Sstevel@tonic-gate } 9510Sstevel@tonic-gate 9520Sstevel@tonic-gate #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 9530Sstevel@tonic-gate conn_t **list, *prev, *next; \ 9540Sstevel@tonic-gate boolean_t isv4mapped = \ 9550Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 9560Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 9570Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 9580Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9590Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9600Sstevel@tonic-gate list = &(connfp)->connf_head; \ 9610Sstevel@tonic-gate prev = NULL; \ 9620Sstevel@tonic-gate while ((next = *list) != NULL) { \ 9630Sstevel@tonic-gate if (isv4mapped && \ 9640Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 9650Sstevel@tonic-gate connp->conn_zoneid == next->conn_zoneid) { \ 9660Sstevel@tonic-gate (connp)->conn_next = next; \ 9670Sstevel@tonic-gate if (prev != NULL) \ 9680Sstevel@tonic-gate prev = next->conn_prev; \ 9690Sstevel@tonic-gate next->conn_prev = (connp); \ 9700Sstevel@tonic-gate break; \ 9710Sstevel@tonic-gate } \ 9720Sstevel@tonic-gate list = &next->conn_next; \ 9730Sstevel@tonic-gate prev = next; \ 9740Sstevel@tonic-gate } \ 9750Sstevel@tonic-gate (connp)->conn_prev = prev; \ 9760Sstevel@tonic-gate *list = (connp); \ 9770Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9780Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9790Sstevel@tonic-gate IPCL_BOUND; \ 9800Sstevel@tonic-gate CONN_INC_REF((connp)); \ 9810Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9820Sstevel@tonic-gate } 9830Sstevel@tonic-gate 9840Sstevel@tonic-gate void 9850Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 9860Sstevel@tonic-gate { 9870Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9880Sstevel@tonic-gate } 9890Sstevel@tonic-gate 9900Sstevel@tonic-gate void 9910Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol) 9920Sstevel@tonic-gate { 9930Sstevel@tonic-gate connf_t *connfp; 9943448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 9950Sstevel@tonic-gate 9960Sstevel@tonic-gate ASSERT(connp != NULL); 9971676Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 9981676Sjpk protocol == IPPROTO_ESP); 9990Sstevel@tonic-gate 10000Sstevel@tonic-gate connp->conn_ulp = protocol; 10010Sstevel@tonic-gate 10020Sstevel@tonic-gate /* Insert it in the protocol hash */ 10033448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 10040Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10050Sstevel@tonic-gate } 10060Sstevel@tonic-gate 10070Sstevel@tonic-gate void 10080Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 10090Sstevel@tonic-gate { 10100Sstevel@tonic-gate connf_t *connfp; 10113448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 10120Sstevel@tonic-gate 10130Sstevel@tonic-gate ASSERT(connp != NULL); 10141676Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 10151676Sjpk protocol == IPPROTO_ESP); 10160Sstevel@tonic-gate 10170Sstevel@tonic-gate connp->conn_ulp = protocol; 10180Sstevel@tonic-gate 10190Sstevel@tonic-gate /* Insert it in the Bind Hash */ 10203448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 10210Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10220Sstevel@tonic-gate } 10230Sstevel@tonic-gate 10240Sstevel@tonic-gate /* 10250Sstevel@tonic-gate * This function is used only for inserting SCTP raw socket now. 10260Sstevel@tonic-gate * This may change later. 10270Sstevel@tonic-gate * 10280Sstevel@tonic-gate * Note that only one raw socket can be bound to a port. The param 10290Sstevel@tonic-gate * lport is in network byte order. 10300Sstevel@tonic-gate */ 10310Sstevel@tonic-gate static int 10320Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 10330Sstevel@tonic-gate { 10340Sstevel@tonic-gate connf_t *connfp; 10350Sstevel@tonic-gate conn_t *oconnp; 10363448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 10370Sstevel@tonic-gate 10383448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 10390Sstevel@tonic-gate 10400Sstevel@tonic-gate /* Check for existing raw socket already bound to the port. */ 10410Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 10420Sstevel@tonic-gate for (oconnp = connfp->connf_head; oconnp != NULL; 1043409Skcpoon oconnp = oconnp->conn_next) { 10440Sstevel@tonic-gate if (oconnp->conn_lport == lport && 10450Sstevel@tonic-gate oconnp->conn_zoneid == connp->conn_zoneid && 10460Sstevel@tonic-gate oconnp->conn_af_isv6 == connp->conn_af_isv6 && 10470Sstevel@tonic-gate ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 10480Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 10490Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 10500Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 10510Sstevel@tonic-gate IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 10520Sstevel@tonic-gate &connp->conn_srcv6))) { 10530Sstevel@tonic-gate break; 10540Sstevel@tonic-gate } 10550Sstevel@tonic-gate } 10560Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 10570Sstevel@tonic-gate if (oconnp != NULL) 10580Sstevel@tonic-gate return (EADDRNOTAVAIL); 10590Sstevel@tonic-gate 10600Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 10610Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 10620Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 10630Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 10640Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10650Sstevel@tonic-gate } else { 10660Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10670Sstevel@tonic-gate } 10680Sstevel@tonic-gate } else { 10690Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 10700Sstevel@tonic-gate } 10710Sstevel@tonic-gate return (0); 10720Sstevel@tonic-gate } 10730Sstevel@tonic-gate 10740Sstevel@tonic-gate /* 10751676Sjpk * Check for a MAC exemption conflict on a labeled system. Note that for 10761676Sjpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 10771676Sjpk * transport layer. This check is for binding all other protocols. 10781676Sjpk * 10791676Sjpk * Returns true if there's a conflict. 10801676Sjpk */ 10811676Sjpk static boolean_t 10823448Sdh155122 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 10831676Sjpk { 10841676Sjpk connf_t *connfp; 10851676Sjpk conn_t *tconn; 10861676Sjpk 10873448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 10881676Sjpk mutex_enter(&connfp->connf_lock); 10891676Sjpk for (tconn = connfp->connf_head; tconn != NULL; 10901676Sjpk tconn = tconn->conn_next) { 10911676Sjpk /* We don't allow v4 fallback for v6 raw socket */ 10921676Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 10931676Sjpk continue; 10941676Sjpk /* If neither is exempt, then there's no conflict */ 10951676Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 10961676Sjpk continue; 10971676Sjpk /* If both are bound to different specific addrs, ok */ 10981676Sjpk if (connp->conn_src != INADDR_ANY && 10991676Sjpk tconn->conn_src != INADDR_ANY && 11001676Sjpk connp->conn_src != tconn->conn_src) 11011676Sjpk continue; 11021676Sjpk /* These two conflict; fail */ 11031676Sjpk break; 11041676Sjpk } 11051676Sjpk mutex_exit(&connfp->connf_lock); 11061676Sjpk return (tconn != NULL); 11071676Sjpk } 11081676Sjpk 11091676Sjpk static boolean_t 11103448Sdh155122 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 11111676Sjpk { 11121676Sjpk connf_t *connfp; 11131676Sjpk conn_t *tconn; 11141676Sjpk 11153448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 11161676Sjpk mutex_enter(&connfp->connf_lock); 11171676Sjpk for (tconn = connfp->connf_head; tconn != NULL; 11181676Sjpk tconn = tconn->conn_next) { 11191676Sjpk /* We don't allow v4 fallback for v6 raw socket */ 11201676Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 11211676Sjpk continue; 11221676Sjpk /* If neither is exempt, then there's no conflict */ 11231676Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 11241676Sjpk continue; 11251676Sjpk /* If both are bound to different addrs, ok */ 11261676Sjpk if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) && 11271676Sjpk !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) && 11281676Sjpk !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6)) 11291676Sjpk continue; 11301676Sjpk /* These two conflict; fail */ 11311676Sjpk break; 11321676Sjpk } 11331676Sjpk mutex_exit(&connfp->connf_lock); 11341676Sjpk return (tconn != NULL); 11351676Sjpk } 11361676Sjpk 11371676Sjpk /* 11380Sstevel@tonic-gate * (v4, v6) bind hash insertion routines 11390Sstevel@tonic-gate */ 11400Sstevel@tonic-gate int 11410Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 11420Sstevel@tonic-gate { 11430Sstevel@tonic-gate connf_t *connfp; 11440Sstevel@tonic-gate #ifdef IPCL_DEBUG 11450Sstevel@tonic-gate char buf[INET_NTOA_BUFSIZE]; 11460Sstevel@tonic-gate #endif 11470Sstevel@tonic-gate int ret = 0; 11483448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 11490Sstevel@tonic-gate 11500Sstevel@tonic-gate ASSERT(connp); 11510Sstevel@tonic-gate 11520Sstevel@tonic-gate IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 11530Sstevel@tonic-gate "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 11540Sstevel@tonic-gate 11550Sstevel@tonic-gate connp->conn_ulp = protocol; 11560Sstevel@tonic-gate IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 11570Sstevel@tonic-gate connp->conn_lport = lport; 11580Sstevel@tonic-gate 11590Sstevel@tonic-gate switch (protocol) { 11601676Sjpk default: 11613448Sdh155122 if (is_system_labeled() && 11623448Sdh155122 check_exempt_conflict_v4(connp, ipst)) 11631676Sjpk return (EADDRINUSE); 11641676Sjpk /* FALLTHROUGH */ 11650Sstevel@tonic-gate case IPPROTO_UDP: 11660Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 11670Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 11680Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - udp\n", 11690Sstevel@tonic-gate (void *)connp)); 11703448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 11713448Sdh155122 IPCL_UDP_HASH(lport, ipst)]; 11720Sstevel@tonic-gate } else { 11730Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 11740Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - protocol\n", 11750Sstevel@tonic-gate (void *)connp)); 11763448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 11770Sstevel@tonic-gate } 11780Sstevel@tonic-gate 11790Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 11800Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 11810Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 11820Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11830Sstevel@tonic-gate } else { 11840Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11850Sstevel@tonic-gate } 11860Sstevel@tonic-gate break; 11870Sstevel@tonic-gate 11880Sstevel@tonic-gate case IPPROTO_TCP: 11890Sstevel@tonic-gate 11900Sstevel@tonic-gate /* Insert it in the Bind Hash */ 11911676Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 11923448Sdh155122 connfp = &ipst->ips_ipcl_bind_fanout[ 11933448Sdh155122 IPCL_BIND_HASH(lport, ipst)]; 11940Sstevel@tonic-gate if (connp->conn_src != INADDR_ANY) { 11950Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11960Sstevel@tonic-gate } else { 11970Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11980Sstevel@tonic-gate } 11990Sstevel@tonic-gate if (cl_inet_listen != NULL) { 12000Sstevel@tonic-gate ASSERT(!connp->conn_pkt_isv6); 12010Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 12028392SHuafeng.Lv@Sun.COM (*cl_inet_listen)( 12038392SHuafeng.Lv@Sun.COM connp->conn_netstack->netstack_stackid, 12048392SHuafeng.Lv@Sun.COM IPPROTO_TCP, AF_INET, 12058392SHuafeng.Lv@Sun.COM (uint8_t *)&connp->conn_bound_source, lport, NULL); 12060Sstevel@tonic-gate } 12070Sstevel@tonic-gate break; 12080Sstevel@tonic-gate 12090Sstevel@tonic-gate case IPPROTO_SCTP: 12100Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 12110Sstevel@tonic-gate break; 12120Sstevel@tonic-gate } 12130Sstevel@tonic-gate 12140Sstevel@tonic-gate return (ret); 12150Sstevel@tonic-gate } 12160Sstevel@tonic-gate 12170Sstevel@tonic-gate int 12180Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 12190Sstevel@tonic-gate uint16_t lport) 12200Sstevel@tonic-gate { 12210Sstevel@tonic-gate connf_t *connfp; 12220Sstevel@tonic-gate int ret = 0; 12233448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12240Sstevel@tonic-gate 12250Sstevel@tonic-gate ASSERT(connp); 12260Sstevel@tonic-gate 12270Sstevel@tonic-gate connp->conn_ulp = protocol; 12280Sstevel@tonic-gate connp->conn_srcv6 = *src; 12290Sstevel@tonic-gate connp->conn_lport = lport; 12300Sstevel@tonic-gate 12310Sstevel@tonic-gate switch (protocol) { 12321676Sjpk default: 12333448Sdh155122 if (is_system_labeled() && 12343448Sdh155122 check_exempt_conflict_v6(connp, ipst)) 12351676Sjpk return (EADDRINUSE); 12361676Sjpk /* FALLTHROUGH */ 12370Sstevel@tonic-gate case IPPROTO_UDP: 12380Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 12390Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 12400Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - udp\n", 12410Sstevel@tonic-gate (void *)connp)); 12423448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 12433448Sdh155122 IPCL_UDP_HASH(lport, ipst)]; 12440Sstevel@tonic-gate } else { 12450Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 12460Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - protocol\n", 12470Sstevel@tonic-gate (void *)connp)); 12483448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 12490Sstevel@tonic-gate } 12500Sstevel@tonic-gate 12510Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 12520Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 12530Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 12540Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12550Sstevel@tonic-gate } else { 12560Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12570Sstevel@tonic-gate } 12580Sstevel@tonic-gate break; 12590Sstevel@tonic-gate 12600Sstevel@tonic-gate case IPPROTO_TCP: 12610Sstevel@tonic-gate /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 12620Sstevel@tonic-gate 12630Sstevel@tonic-gate /* Insert it in the Bind Hash */ 12641676Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 12653448Sdh155122 connfp = &ipst->ips_ipcl_bind_fanout[ 12663448Sdh155122 IPCL_BIND_HASH(lport, ipst)]; 12670Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 12680Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12690Sstevel@tonic-gate } else { 12700Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12710Sstevel@tonic-gate } 12720Sstevel@tonic-gate if (cl_inet_listen != NULL) { 12730Sstevel@tonic-gate sa_family_t addr_family; 12740Sstevel@tonic-gate uint8_t *laddrp; 12750Sstevel@tonic-gate 12760Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 12770Sstevel@tonic-gate addr_family = AF_INET6; 12780Sstevel@tonic-gate laddrp = 12790Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source_v6; 12800Sstevel@tonic-gate } else { 12810Sstevel@tonic-gate addr_family = AF_INET; 12820Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 12830Sstevel@tonic-gate } 12840Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 12858392SHuafeng.Lv@Sun.COM (*cl_inet_listen)( 12868392SHuafeng.Lv@Sun.COM connp->conn_netstack->netstack_stackid, 12878392SHuafeng.Lv@Sun.COM IPPROTO_TCP, addr_family, laddrp, lport, NULL); 12880Sstevel@tonic-gate } 12890Sstevel@tonic-gate break; 12900Sstevel@tonic-gate 12910Sstevel@tonic-gate case IPPROTO_SCTP: 12920Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 12930Sstevel@tonic-gate break; 12940Sstevel@tonic-gate } 12950Sstevel@tonic-gate 12960Sstevel@tonic-gate return (ret); 12970Sstevel@tonic-gate } 12980Sstevel@tonic-gate 12990Sstevel@tonic-gate /* 13000Sstevel@tonic-gate * ipcl_conn_hash insertion routines. 13010Sstevel@tonic-gate */ 13020Sstevel@tonic-gate int 13030Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 13040Sstevel@tonic-gate ipaddr_t rem, uint32_t ports) 13050Sstevel@tonic-gate { 13060Sstevel@tonic-gate connf_t *connfp; 13070Sstevel@tonic-gate uint16_t *up; 13080Sstevel@tonic-gate conn_t *tconnp; 13090Sstevel@tonic-gate #ifdef IPCL_DEBUG 13100Sstevel@tonic-gate char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 13110Sstevel@tonic-gate #endif 13120Sstevel@tonic-gate in_port_t lport; 13130Sstevel@tonic-gate int ret = 0; 13143448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13150Sstevel@tonic-gate 13160Sstevel@tonic-gate IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 13170Sstevel@tonic-gate "dst = %s, ports = %x, protocol = %x", (void *)connp, 13180Sstevel@tonic-gate inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 13190Sstevel@tonic-gate ports, protocol)); 13200Sstevel@tonic-gate 13210Sstevel@tonic-gate switch (protocol) { 13220Sstevel@tonic-gate case IPPROTO_TCP: 13230Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 13240Sstevel@tonic-gate /* 13250Sstevel@tonic-gate * for a eager connection, i.e connections which 13260Sstevel@tonic-gate * have just been created, the initialization is 13270Sstevel@tonic-gate * already done in ip at conn_creation time, so 13280Sstevel@tonic-gate * we can skip the checks here. 13290Sstevel@tonic-gate */ 13300Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 13310Sstevel@tonic-gate } 13328432SJonathan.Anderson@Sun.COM 13338432SJonathan.Anderson@Sun.COM /* 13348432SJonathan.Anderson@Sun.COM * For tcp, we check whether the connection tuple already 13358432SJonathan.Anderson@Sun.COM * exists before allowing the connection to proceed. We 13368432SJonathan.Anderson@Sun.COM * also allow indexing on the zoneid. This is to allow 13378432SJonathan.Anderson@Sun.COM * multiple shared stack zones to have the same tcp 13388432SJonathan.Anderson@Sun.COM * connection tuple. In practice this only happens for 13398432SJonathan.Anderson@Sun.COM * INADDR_LOOPBACK as it's the only local address which 13408432SJonathan.Anderson@Sun.COM * doesn't have to be unique. 13418432SJonathan.Anderson@Sun.COM */ 13423448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[ 13433448Sdh155122 IPCL_CONN_HASH(connp->conn_rem, 13443448Sdh155122 connp->conn_ports, ipst)]; 13450Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13460Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 13470Sstevel@tonic-gate tconnp = tconnp->conn_next) { 13488432SJonathan.Anderson@Sun.COM if ((IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 13490Sstevel@tonic-gate connp->conn_rem, connp->conn_src, 13508432SJonathan.Anderson@Sun.COM connp->conn_ports)) && 13518432SJonathan.Anderson@Sun.COM (IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid))) { 13520Sstevel@tonic-gate 13530Sstevel@tonic-gate /* Already have a conn. bail out */ 13540Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13550Sstevel@tonic-gate return (EADDRINUSE); 13560Sstevel@tonic-gate } 13570Sstevel@tonic-gate } 13580Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 13590Sstevel@tonic-gate /* 13600Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 13610Sstevel@tonic-gate * rebind. Let it happen. 13620Sstevel@tonic-gate */ 13630Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13640Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 13650Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13660Sstevel@tonic-gate } 13673104Sjprakash 13683104Sjprakash ASSERT(connp->conn_recv != NULL); 13693104Sjprakash 13700Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 13710Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13720Sstevel@tonic-gate break; 13730Sstevel@tonic-gate 13740Sstevel@tonic-gate case IPPROTO_SCTP: 1375409Skcpoon /* 1376409Skcpoon * The raw socket may have already been bound, remove it 1377409Skcpoon * from the hash first. 1378409Skcpoon */ 1379409Skcpoon IPCL_HASH_REMOVE(connp); 1380409Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 13810Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 13820Sstevel@tonic-gate break; 13830Sstevel@tonic-gate 13841676Sjpk default: 13851676Sjpk /* 13861676Sjpk * Check for conflicts among MAC exempt bindings. For 13871676Sjpk * transports with port numbers, this is done by the upper 13881676Sjpk * level per-transport binding logic. For all others, it's 13891676Sjpk * done here. 13901676Sjpk */ 13913448Sdh155122 if (is_system_labeled() && 13923448Sdh155122 check_exempt_conflict_v4(connp, ipst)) 13931676Sjpk return (EADDRINUSE); 13941676Sjpk /* FALLTHROUGH */ 13951676Sjpk 13960Sstevel@tonic-gate case IPPROTO_UDP: 13970Sstevel@tonic-gate up = (uint16_t *)&ports; 13980Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 13990Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 14003448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 14013448Sdh155122 IPCL_UDP_HASH(up[1], ipst)]; 14020Sstevel@tonic-gate } else { 14033448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 14040Sstevel@tonic-gate } 14050Sstevel@tonic-gate 14060Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 14070Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 14080Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 14090Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 14100Sstevel@tonic-gate } else { 14110Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 14120Sstevel@tonic-gate } 14130Sstevel@tonic-gate break; 14140Sstevel@tonic-gate } 14150Sstevel@tonic-gate 14160Sstevel@tonic-gate return (ret); 14170Sstevel@tonic-gate } 14180Sstevel@tonic-gate 14190Sstevel@tonic-gate int 14200Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 14210Sstevel@tonic-gate const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 14220Sstevel@tonic-gate { 14230Sstevel@tonic-gate connf_t *connfp; 14240Sstevel@tonic-gate uint16_t *up; 14250Sstevel@tonic-gate conn_t *tconnp; 14260Sstevel@tonic-gate in_port_t lport; 14270Sstevel@tonic-gate int ret = 0; 14283448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 14290Sstevel@tonic-gate 14300Sstevel@tonic-gate switch (protocol) { 14310Sstevel@tonic-gate case IPPROTO_TCP: 14320Sstevel@tonic-gate /* Just need to insert a conn struct */ 14330Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 14340Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 14350Sstevel@tonic-gate } 14368432SJonathan.Anderson@Sun.COM 14378432SJonathan.Anderson@Sun.COM /* 14388432SJonathan.Anderson@Sun.COM * For tcp, we check whether the connection tuple already 14398432SJonathan.Anderson@Sun.COM * exists before allowing the connection to proceed. We 14408432SJonathan.Anderson@Sun.COM * also allow indexing on the zoneid. This is to allow 14418432SJonathan.Anderson@Sun.COM * multiple shared stack zones to have the same tcp 14428432SJonathan.Anderson@Sun.COM * connection tuple. In practice this only happens for 14438432SJonathan.Anderson@Sun.COM * ipv6_loopback as it's the only local address which 14448432SJonathan.Anderson@Sun.COM * doesn't have to be unique. 14458432SJonathan.Anderson@Sun.COM */ 14463448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[ 14473448Sdh155122 IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports, 14483448Sdh155122 ipst)]; 14490Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14500Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 14510Sstevel@tonic-gate tconnp = tconnp->conn_next) { 14520Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 14530Sstevel@tonic-gate connp->conn_remv6, connp->conn_srcv6, 14540Sstevel@tonic-gate connp->conn_ports) && 14550Sstevel@tonic-gate (tconnp->conn_tcp->tcp_bound_if == 0 || 14568432SJonathan.Anderson@Sun.COM tconnp->conn_tcp->tcp_bound_if == ifindex) && 14578432SJonathan.Anderson@Sun.COM (IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid))) { 14580Sstevel@tonic-gate /* Already have a conn. bail out */ 14590Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14600Sstevel@tonic-gate return (EADDRINUSE); 14610Sstevel@tonic-gate } 14620Sstevel@tonic-gate } 14630Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 14640Sstevel@tonic-gate /* 14650Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 14660Sstevel@tonic-gate * rebind. Let it happen. 14670Sstevel@tonic-gate */ 14680Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14690Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 14700Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14710Sstevel@tonic-gate } 14720Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 14730Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14740Sstevel@tonic-gate break; 14750Sstevel@tonic-gate 14760Sstevel@tonic-gate case IPPROTO_SCTP: 1477409Skcpoon IPCL_HASH_REMOVE(connp); 1478409Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 14790Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 14800Sstevel@tonic-gate break; 14810Sstevel@tonic-gate 14821676Sjpk default: 14833448Sdh155122 if (is_system_labeled() && 14843448Sdh155122 check_exempt_conflict_v6(connp, ipst)) 14851676Sjpk return (EADDRINUSE); 14861676Sjpk /* FALLTHROUGH */ 14870Sstevel@tonic-gate case IPPROTO_UDP: 14880Sstevel@tonic-gate up = (uint16_t *)&ports; 14890Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 14900Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 14913448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 14923448Sdh155122 IPCL_UDP_HASH(up[1], ipst)]; 14930Sstevel@tonic-gate } else { 14943448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 14950Sstevel@tonic-gate } 14960Sstevel@tonic-gate 14970Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 14980Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 14990Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 15000Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 15010Sstevel@tonic-gate } else { 15020Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 15030Sstevel@tonic-gate } 15040Sstevel@tonic-gate break; 15050Sstevel@tonic-gate } 15060Sstevel@tonic-gate 15070Sstevel@tonic-gate return (ret); 15080Sstevel@tonic-gate } 15090Sstevel@tonic-gate 15100Sstevel@tonic-gate /* 15110Sstevel@tonic-gate * v4 packet classifying function. looks up the fanout table to 15120Sstevel@tonic-gate * find the conn, the packet belongs to. returns the conn with 15130Sstevel@tonic-gate * the reference held, null otherwise. 15141676Sjpk * 15151676Sjpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection 15161676Sjpk * Lookup" comment block are applied. Labels are also checked as described 15171676Sjpk * above. If the packet is from the inside (looped back), and is from the same 15181676Sjpk * zone, then label checks are omitted. 15190Sstevel@tonic-gate */ 15200Sstevel@tonic-gate conn_t * 15213448Sdh155122 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 15223448Sdh155122 ip_stack_t *ipst) 15230Sstevel@tonic-gate { 15240Sstevel@tonic-gate ipha_t *ipha; 15250Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 15260Sstevel@tonic-gate uint16_t lport; 15270Sstevel@tonic-gate uint16_t fport; 15280Sstevel@tonic-gate uint32_t ports; 15290Sstevel@tonic-gate conn_t *connp; 15300Sstevel@tonic-gate uint16_t *up; 15311676Sjpk boolean_t shared_addr; 15321676Sjpk boolean_t unlabeled; 15330Sstevel@tonic-gate 15340Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 15350Sstevel@tonic-gate up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 15360Sstevel@tonic-gate 15370Sstevel@tonic-gate switch (protocol) { 15380Sstevel@tonic-gate case IPPROTO_TCP: 15390Sstevel@tonic-gate ports = *(uint32_t *)up; 15400Sstevel@tonic-gate connfp = 15413448Sdh155122 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 15423448Sdh155122 ports, ipst)]; 15430Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 15440Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 15450Sstevel@tonic-gate connp = connp->conn_next) { 15468432SJonathan.Anderson@Sun.COM if ((IPCL_CONN_MATCH(connp, protocol, 15478432SJonathan.Anderson@Sun.COM ipha->ipha_src, ipha->ipha_dst, ports)) && 15488432SJonathan.Anderson@Sun.COM (IPCL_ZONE_MATCH(connp, zoneid))) { 15490Sstevel@tonic-gate break; 15508432SJonathan.Anderson@Sun.COM } 15510Sstevel@tonic-gate } 15520Sstevel@tonic-gate 15530Sstevel@tonic-gate if (connp != NULL) { 15541676Sjpk /* 15551676Sjpk * We have a fully-bound TCP connection. 15561676Sjpk * 15571676Sjpk * For labeled systems, there's no need to check the 15581676Sjpk * label here. It's known to be good as we checked 15591676Sjpk * before allowing the connection to become bound. 15601676Sjpk */ 15610Sstevel@tonic-gate CONN_INC_REF(connp); 15620Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15630Sstevel@tonic-gate return (connp); 15640Sstevel@tonic-gate } 15650Sstevel@tonic-gate 15660Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15670Sstevel@tonic-gate 15680Sstevel@tonic-gate lport = up[1]; 15691676Sjpk unlabeled = B_FALSE; 15701676Sjpk /* Cred cannot be null on IPv4 */ 15711676Sjpk if (is_system_labeled()) 15721676Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 15731676Sjpk TSLF_UNLABELED) != 0; 15741676Sjpk shared_addr = (zoneid == ALL_ZONES); 15751676Sjpk if (shared_addr) { 15763448Sdh155122 /* 15773448Sdh155122 * No need to handle exclusive-stack zones since 15783448Sdh155122 * ALL_ZONES only applies to the shared stack. 15793448Sdh155122 */ 15801676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 15811676Sjpk /* 15821676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 15831676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 15841676Sjpk * search for the zone based on the packet label. 15851676Sjpk * 15861676Sjpk * If there is such a zone, we prefer to find a 15871676Sjpk * connection in it. Otherwise, we look for a 15881676Sjpk * MAC-exempt connection in any zone whose label 15891676Sjpk * dominates the default label on the packet. 15901676Sjpk */ 15911676Sjpk if (zoneid == ALL_ZONES) 15921676Sjpk zoneid = tsol_packet_to_zoneid(mp); 15931676Sjpk else 15941676Sjpk unlabeled = B_FALSE; 15951676Sjpk } 15961676Sjpk 15973448Sdh155122 bind_connfp = 15983448Sdh155122 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 15990Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 16000Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 16010Sstevel@tonic-gate connp = connp->conn_next) { 16021676Sjpk if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 16032263Ssommerfe lport) && (IPCL_ZONE_MATCH(connp, zoneid) || 16041676Sjpk (unlabeled && connp->conn_mac_exempt))) 16050Sstevel@tonic-gate break; 16060Sstevel@tonic-gate } 16070Sstevel@tonic-gate 16081676Sjpk /* 16091676Sjpk * If the matching connection is SLP on a private address, then 16101676Sjpk * the label on the packet must match the local zone's label. 16111676Sjpk * Otherwise, it must be in the label range defined by tnrh. 16121676Sjpk * This is ensured by tsol_receive_label. 16131676Sjpk */ 16141676Sjpk if (connp != NULL && is_system_labeled() && 16151676Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 16161676Sjpk shared_addr, connp)) { 16171676Sjpk DTRACE_PROBE3( 16181676Sjpk tx__ip__log__info__classify__tcp, 16191676Sjpk char *, 16201676Sjpk "connp(1) could not receive mp(2)", 16211676Sjpk conn_t *, connp, mblk_t *, mp); 16221676Sjpk connp = NULL; 16231676Sjpk } 16241676Sjpk 16250Sstevel@tonic-gate if (connp != NULL) { 16261676Sjpk /* Have a listener at least */ 16270Sstevel@tonic-gate CONN_INC_REF(connp); 16280Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 16290Sstevel@tonic-gate return (connp); 16300Sstevel@tonic-gate } 16310Sstevel@tonic-gate 16320Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 16330Sstevel@tonic-gate 16340Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 16350Sstevel@tonic-gate ("ipcl_classify: couldn't classify mp = %p\n", 16360Sstevel@tonic-gate (void *)mp)); 16370Sstevel@tonic-gate break; 16380Sstevel@tonic-gate 16390Sstevel@tonic-gate case IPPROTO_UDP: 16400Sstevel@tonic-gate lport = up[1]; 16411676Sjpk unlabeled = B_FALSE; 16421676Sjpk /* Cred cannot be null on IPv4 */ 16431676Sjpk if (is_system_labeled()) 16441676Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 16451676Sjpk TSLF_UNLABELED) != 0; 16461676Sjpk shared_addr = (zoneid == ALL_ZONES); 16471676Sjpk if (shared_addr) { 16483448Sdh155122 /* 16493448Sdh155122 * No need to handle exclusive-stack zones since 16503448Sdh155122 * ALL_ZONES only applies to the shared stack. 16513448Sdh155122 */ 16521676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 16531676Sjpk /* 16541676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 16551676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 16561676Sjpk * search for the zone based on the packet label. 16571676Sjpk * 16581676Sjpk * If there is such a zone, we prefer to find a 16591676Sjpk * connection in it. Otherwise, we look for a 16601676Sjpk * MAC-exempt connection in any zone whose label 16611676Sjpk * dominates the default label on the packet. 16621676Sjpk */ 16631676Sjpk if (zoneid == ALL_ZONES) 16641676Sjpk zoneid = tsol_packet_to_zoneid(mp); 16651676Sjpk else 16661676Sjpk unlabeled = B_FALSE; 16671676Sjpk } 16680Sstevel@tonic-gate fport = up[0]; 16690Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 16703448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 16710Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16720Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16730Sstevel@tonic-gate connp = connp->conn_next) { 16740Sstevel@tonic-gate if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 16750Sstevel@tonic-gate fport, ipha->ipha_src) && 16762263Ssommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 16771676Sjpk (unlabeled && connp->conn_mac_exempt))) 16780Sstevel@tonic-gate break; 16790Sstevel@tonic-gate } 16800Sstevel@tonic-gate 16811676Sjpk if (connp != NULL && is_system_labeled() && 16821676Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 16831676Sjpk shared_addr, connp)) { 16841676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp, 16851676Sjpk char *, "connp(1) could not receive mp(2)", 16861676Sjpk conn_t *, connp, mblk_t *, mp); 16871676Sjpk connp = NULL; 16881676Sjpk } 16891676Sjpk 16900Sstevel@tonic-gate if (connp != NULL) { 16910Sstevel@tonic-gate CONN_INC_REF(connp); 16920Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16930Sstevel@tonic-gate return (connp); 16940Sstevel@tonic-gate } 16950Sstevel@tonic-gate 16960Sstevel@tonic-gate /* 16970Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 16980Sstevel@tonic-gate */ 16990Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17000Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 17010Sstevel@tonic-gate ("ipcl_classify: cant find udp conn_t for ports : %x %x", 17020Sstevel@tonic-gate lport, fport)); 17030Sstevel@tonic-gate break; 17040Sstevel@tonic-gate } 17050Sstevel@tonic-gate 17060Sstevel@tonic-gate return (NULL); 17070Sstevel@tonic-gate } 17080Sstevel@tonic-gate 17090Sstevel@tonic-gate conn_t * 17103448Sdh155122 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 17113448Sdh155122 ip_stack_t *ipst) 17120Sstevel@tonic-gate { 17130Sstevel@tonic-gate ip6_t *ip6h; 17140Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 17150Sstevel@tonic-gate uint16_t lport; 17160Sstevel@tonic-gate uint16_t fport; 17170Sstevel@tonic-gate tcph_t *tcph; 17180Sstevel@tonic-gate uint32_t ports; 17190Sstevel@tonic-gate conn_t *connp; 17200Sstevel@tonic-gate uint16_t *up; 17211676Sjpk boolean_t shared_addr; 17221676Sjpk boolean_t unlabeled; 17230Sstevel@tonic-gate 17240Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 17250Sstevel@tonic-gate 17260Sstevel@tonic-gate switch (protocol) { 17270Sstevel@tonic-gate case IPPROTO_TCP: 17280Sstevel@tonic-gate tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 17290Sstevel@tonic-gate up = (uint16_t *)tcph->th_lport; 17300Sstevel@tonic-gate ports = *(uint32_t *)up; 17310Sstevel@tonic-gate 17320Sstevel@tonic-gate connfp = 17333448Sdh155122 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 17343448Sdh155122 ports, ipst)]; 17350Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 17360Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 17370Sstevel@tonic-gate connp = connp->conn_next) { 17388432SJonathan.Anderson@Sun.COM if ((IPCL_CONN_MATCH_V6(connp, protocol, 17398432SJonathan.Anderson@Sun.COM ip6h->ip6_src, ip6h->ip6_dst, ports)) && 17408432SJonathan.Anderson@Sun.COM (IPCL_ZONE_MATCH(connp, zoneid))) { 17410Sstevel@tonic-gate break; 17428432SJonathan.Anderson@Sun.COM } 17430Sstevel@tonic-gate } 17440Sstevel@tonic-gate 17450Sstevel@tonic-gate if (connp != NULL) { 17461676Sjpk /* 17471676Sjpk * We have a fully-bound TCP connection. 17481676Sjpk * 17491676Sjpk * For labeled systems, there's no need to check the 17501676Sjpk * label here. It's known to be good as we checked 17511676Sjpk * before allowing the connection to become bound. 17521676Sjpk */ 17530Sstevel@tonic-gate CONN_INC_REF(connp); 17540Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17550Sstevel@tonic-gate return (connp); 17560Sstevel@tonic-gate } 17570Sstevel@tonic-gate 17580Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17590Sstevel@tonic-gate 17600Sstevel@tonic-gate lport = up[1]; 17611676Sjpk unlabeled = B_FALSE; 17621676Sjpk /* Cred can be null on IPv6 */ 17631676Sjpk if (is_system_labeled()) { 17641676Sjpk cred_t *cr = DB_CRED(mp); 17651676Sjpk 17661676Sjpk unlabeled = (cr != NULL && 17671676Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 17681676Sjpk } 17691676Sjpk shared_addr = (zoneid == ALL_ZONES); 17701676Sjpk if (shared_addr) { 17713448Sdh155122 /* 17723448Sdh155122 * No need to handle exclusive-stack zones since 17733448Sdh155122 * ALL_ZONES only applies to the shared stack. 17743448Sdh155122 */ 17751676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 17761676Sjpk /* 17771676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 17781676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 17791676Sjpk * search for the zone based on the packet label. 17801676Sjpk * 17811676Sjpk * If there is such a zone, we prefer to find a 17821676Sjpk * connection in it. Otherwise, we look for a 17831676Sjpk * MAC-exempt connection in any zone whose label 17841676Sjpk * dominates the default label on the packet. 17851676Sjpk */ 17861676Sjpk if (zoneid == ALL_ZONES) 17871676Sjpk zoneid = tsol_packet_to_zoneid(mp); 17881676Sjpk else 17891676Sjpk unlabeled = B_FALSE; 17901676Sjpk } 17911676Sjpk 17923448Sdh155122 bind_connfp = 17933448Sdh155122 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 17940Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 17950Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 17960Sstevel@tonic-gate connp = connp->conn_next) { 17970Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, protocol, 17980Sstevel@tonic-gate ip6h->ip6_dst, lport) && 17992263Ssommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 18001676Sjpk (unlabeled && connp->conn_mac_exempt))) 18010Sstevel@tonic-gate break; 18020Sstevel@tonic-gate } 18030Sstevel@tonic-gate 18041676Sjpk if (connp != NULL && is_system_labeled() && 18051676Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 18061676Sjpk shared_addr, connp)) { 18071676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 18081676Sjpk char *, "connp(1) could not receive mp(2)", 18091676Sjpk conn_t *, connp, mblk_t *, mp); 18101676Sjpk connp = NULL; 18111676Sjpk } 18121676Sjpk 18130Sstevel@tonic-gate if (connp != NULL) { 18140Sstevel@tonic-gate /* Have a listner at least */ 18150Sstevel@tonic-gate CONN_INC_REF(connp); 18160Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 18170Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 18180Sstevel@tonic-gate ("ipcl_classify_v6: found listner " 18190Sstevel@tonic-gate "connp = %p\n", (void *)connp)); 18200Sstevel@tonic-gate 18210Sstevel@tonic-gate return (connp); 18220Sstevel@tonic-gate } 18230Sstevel@tonic-gate 18240Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 18250Sstevel@tonic-gate 18260Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 18270Sstevel@tonic-gate ("ipcl_classify_v6: couldn't classify mp = %p\n", 18280Sstevel@tonic-gate (void *)mp)); 18290Sstevel@tonic-gate break; 18300Sstevel@tonic-gate 18310Sstevel@tonic-gate case IPPROTO_UDP: 18320Sstevel@tonic-gate up = (uint16_t *)&mp->b_rptr[hdr_len]; 18330Sstevel@tonic-gate lport = up[1]; 18341676Sjpk unlabeled = B_FALSE; 18351676Sjpk /* Cred can be null on IPv6 */ 18361676Sjpk if (is_system_labeled()) { 18371676Sjpk cred_t *cr = DB_CRED(mp); 18381676Sjpk 18391676Sjpk unlabeled = (cr != NULL && 18401676Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 18411676Sjpk } 18421676Sjpk shared_addr = (zoneid == ALL_ZONES); 18431676Sjpk if (shared_addr) { 18443448Sdh155122 /* 18453448Sdh155122 * No need to handle exclusive-stack zones since 18463448Sdh155122 * ALL_ZONES only applies to the shared stack. 18473448Sdh155122 */ 18481676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 18491676Sjpk /* 18501676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 18511676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 18521676Sjpk * search for the zone based on the packet label. 18531676Sjpk * 18541676Sjpk * If there is such a zone, we prefer to find a 18551676Sjpk * connection in it. Otherwise, we look for a 18561676Sjpk * MAC-exempt connection in any zone whose label 18571676Sjpk * dominates the default label on the packet. 18581676Sjpk */ 18591676Sjpk if (zoneid == ALL_ZONES) 18601676Sjpk zoneid = tsol_packet_to_zoneid(mp); 18611676Sjpk else 18621676Sjpk unlabeled = B_FALSE; 18631676Sjpk } 18641676Sjpk 18650Sstevel@tonic-gate fport = up[0]; 18660Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 18670Sstevel@tonic-gate fport)); 18683448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 18690Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 18700Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 18710Sstevel@tonic-gate connp = connp->conn_next) { 18720Sstevel@tonic-gate if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 18730Sstevel@tonic-gate fport, ip6h->ip6_src) && 18742263Ssommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 18751676Sjpk (unlabeled && connp->conn_mac_exempt))) 18760Sstevel@tonic-gate break; 18770Sstevel@tonic-gate } 18780Sstevel@tonic-gate 18791676Sjpk if (connp != NULL && is_system_labeled() && 18801676Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 18811676Sjpk shared_addr, connp)) { 18821676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 18831676Sjpk char *, "connp(1) could not receive mp(2)", 18841676Sjpk conn_t *, connp, mblk_t *, mp); 18851676Sjpk connp = NULL; 18861676Sjpk } 18871676Sjpk 18880Sstevel@tonic-gate if (connp != NULL) { 18890Sstevel@tonic-gate CONN_INC_REF(connp); 18900Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 18910Sstevel@tonic-gate return (connp); 18920Sstevel@tonic-gate } 18930Sstevel@tonic-gate 18940Sstevel@tonic-gate /* 18950Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 18960Sstevel@tonic-gate */ 18970Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 18980Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 18990Sstevel@tonic-gate ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 19000Sstevel@tonic-gate lport, fport)); 19010Sstevel@tonic-gate break; 19020Sstevel@tonic-gate } 19030Sstevel@tonic-gate 19040Sstevel@tonic-gate return (NULL); 19050Sstevel@tonic-gate } 19060Sstevel@tonic-gate 19070Sstevel@tonic-gate /* 19080Sstevel@tonic-gate * wrapper around ipcl_classify_(v4,v6) routines. 19090Sstevel@tonic-gate */ 19100Sstevel@tonic-gate conn_t * 19113448Sdh155122 ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst) 19120Sstevel@tonic-gate { 19130Sstevel@tonic-gate uint16_t hdr_len; 19140Sstevel@tonic-gate ipha_t *ipha; 19150Sstevel@tonic-gate uint8_t *nexthdrp; 19160Sstevel@tonic-gate 19170Sstevel@tonic-gate if (MBLKL(mp) < sizeof (ipha_t)) 19180Sstevel@tonic-gate return (NULL); 19190Sstevel@tonic-gate 19200Sstevel@tonic-gate switch (IPH_HDR_VERSION(mp->b_rptr)) { 19210Sstevel@tonic-gate case IPV4_VERSION: 19220Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 19230Sstevel@tonic-gate hdr_len = IPH_HDR_LENGTH(ipha); 19240Sstevel@tonic-gate return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 19253448Sdh155122 zoneid, ipst)); 19260Sstevel@tonic-gate case IPV6_VERSION: 19270Sstevel@tonic-gate if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 19280Sstevel@tonic-gate &hdr_len, &nexthdrp)) 19290Sstevel@tonic-gate return (NULL); 19300Sstevel@tonic-gate 19313448Sdh155122 return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst)); 19320Sstevel@tonic-gate } 19330Sstevel@tonic-gate 19340Sstevel@tonic-gate return (NULL); 19350Sstevel@tonic-gate } 19360Sstevel@tonic-gate 19370Sstevel@tonic-gate conn_t * 19381676Sjpk ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, 19393448Sdh155122 uint32_t ports, ipha_t *hdr, ip_stack_t *ipst) 19400Sstevel@tonic-gate { 19411676Sjpk connf_t *connfp; 19420Sstevel@tonic-gate conn_t *connp; 19430Sstevel@tonic-gate in_port_t lport; 19440Sstevel@tonic-gate int af; 19451676Sjpk boolean_t shared_addr; 19461676Sjpk boolean_t unlabeled; 19471676Sjpk const void *dst; 19480Sstevel@tonic-gate 19490Sstevel@tonic-gate lport = ((uint16_t *)&ports)[1]; 19501676Sjpk 19511676Sjpk unlabeled = B_FALSE; 19521676Sjpk /* Cred can be null on IPv6 */ 19531676Sjpk if (is_system_labeled()) { 19541676Sjpk cred_t *cr = DB_CRED(mp); 19551676Sjpk 19561676Sjpk unlabeled = (cr != NULL && 19571676Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 19581676Sjpk } 19591676Sjpk shared_addr = (zoneid == ALL_ZONES); 19601676Sjpk if (shared_addr) { 19613448Sdh155122 /* 19623448Sdh155122 * No need to handle exclusive-stack zones since ALL_ZONES 19633448Sdh155122 * only applies to the shared stack. 19643448Sdh155122 */ 19651676Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 19661676Sjpk /* 19671676Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 19681676Sjpk * ALL_ZONES. In that case, we assume it's SLP, and search for 19691676Sjpk * the zone based on the packet label. 19701676Sjpk * 19711676Sjpk * If there is such a zone, we prefer to find a connection in 19721676Sjpk * it. Otherwise, we look for a MAC-exempt connection in any 19731676Sjpk * zone whose label dominates the default label on the packet. 19741676Sjpk */ 19751676Sjpk if (zoneid == ALL_ZONES) 19761676Sjpk zoneid = tsol_packet_to_zoneid(mp); 19771676Sjpk else 19781676Sjpk unlabeled = B_FALSE; 19791676Sjpk } 19801676Sjpk 19810Sstevel@tonic-gate af = IPH_HDR_VERSION(hdr); 19821676Sjpk dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst : 19831676Sjpk (const void *)&((ip6_t *)hdr)->ip6_dst; 19843448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 19850Sstevel@tonic-gate 19860Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 19870Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 19880Sstevel@tonic-gate connp = connp->conn_next) { 19890Sstevel@tonic-gate /* We don't allow v4 fallback for v6 raw socket. */ 19901676Sjpk if (af == (connp->conn_af_isv6 ? IPV4_VERSION : 19911676Sjpk IPV6_VERSION)) 19920Sstevel@tonic-gate continue; 19930Sstevel@tonic-gate if (connp->conn_fully_bound) { 19940Sstevel@tonic-gate if (af == IPV4_VERSION) { 19951676Sjpk if (!IPCL_CONN_MATCH(connp, protocol, 19961676Sjpk hdr->ipha_src, hdr->ipha_dst, ports)) 19971676Sjpk continue; 19980Sstevel@tonic-gate } else { 19991676Sjpk if (!IPCL_CONN_MATCH_V6(connp, protocol, 20000Sstevel@tonic-gate ((ip6_t *)hdr)->ip6_src, 20011676Sjpk ((ip6_t *)hdr)->ip6_dst, ports)) 20021676Sjpk continue; 20030Sstevel@tonic-gate } 20040Sstevel@tonic-gate } else { 20050Sstevel@tonic-gate if (af == IPV4_VERSION) { 20061676Sjpk if (!IPCL_BIND_MATCH(connp, protocol, 20071676Sjpk hdr->ipha_dst, lport)) 20081676Sjpk continue; 20090Sstevel@tonic-gate } else { 20101676Sjpk if (!IPCL_BIND_MATCH_V6(connp, protocol, 20111676Sjpk ((ip6_t *)hdr)->ip6_dst, lport)) 20121676Sjpk continue; 20130Sstevel@tonic-gate } 20140Sstevel@tonic-gate } 20151676Sjpk 20162263Ssommerfe if (IPCL_ZONE_MATCH(connp, zoneid) || 20171676Sjpk (unlabeled && connp->conn_mac_exempt)) 20181676Sjpk break; 20191676Sjpk } 20201676Sjpk /* 20211676Sjpk * If the connection is fully-bound and connection-oriented (TCP or 20221676Sjpk * SCTP), then we've already validated the remote system's label. 20231676Sjpk * There's no need to do it again for every packet. 20241676Sjpk */ 20251676Sjpk if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound || 20261676Sjpk !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) && 20271676Sjpk !tsol_receive_local(mp, dst, af, shared_addr, connp)) { 20281676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 20291676Sjpk char *, "connp(1) could not receive mp(2)", 20301676Sjpk conn_t *, connp, mblk_t *, mp); 20311676Sjpk connp = NULL; 20320Sstevel@tonic-gate } 2033409Skcpoon 2034409Skcpoon if (connp != NULL) 2035409Skcpoon goto found; 2036409Skcpoon mutex_exit(&connfp->connf_lock); 2037409Skcpoon 2038409Skcpoon /* Try to look for a wildcard match. */ 20393448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 2040409Skcpoon mutex_enter(&connfp->connf_lock); 2041409Skcpoon for (connp = connfp->connf_head; connp != NULL; 2042409Skcpoon connp = connp->conn_next) { 2043409Skcpoon /* We don't allow v4 fallback for v6 raw socket. */ 2044409Skcpoon if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 20452263Ssommerfe IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) { 2046409Skcpoon continue; 2047409Skcpoon } 2048409Skcpoon if (af == IPV4_VERSION) { 2049409Skcpoon if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 2050409Skcpoon break; 2051409Skcpoon } else { 2052409Skcpoon if (IPCL_RAW_MATCH_V6(connp, protocol, 2053409Skcpoon ((ip6_t *)hdr)->ip6_dst)) { 2054409Skcpoon break; 2055409Skcpoon } 2056409Skcpoon } 20570Sstevel@tonic-gate } 2058409Skcpoon 2059409Skcpoon if (connp != NULL) 2060409Skcpoon goto found; 2061409Skcpoon 20620Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 20630Sstevel@tonic-gate return (NULL); 2064409Skcpoon 2065409Skcpoon found: 2066409Skcpoon ASSERT(connp != NULL); 2067409Skcpoon CONN_INC_REF(connp); 2068409Skcpoon mutex_exit(&connfp->connf_lock); 2069409Skcpoon return (connp); 20700Sstevel@tonic-gate } 20710Sstevel@tonic-gate 20720Sstevel@tonic-gate /* ARGSUSED */ 20730Sstevel@tonic-gate static int 20745240Snordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 20750Sstevel@tonic-gate { 20760Sstevel@tonic-gate itc_t *itc = (itc_t *)buf; 20770Sstevel@tonic-gate conn_t *connp = &itc->itc_conn; 20785240Snordmark tcp_t *tcp = (tcp_t *)&itc[1]; 20795240Snordmark 20805240Snordmark bzero(connp, sizeof (conn_t)); 20815240Snordmark bzero(tcp, sizeof (tcp_t)); 20825240Snordmark 20835240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 20845240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 20858348SEric.Yu@Sun.COM cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL); 20860Sstevel@tonic-gate tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 20870Sstevel@tonic-gate connp->conn_tcp = tcp; 20880Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN; 20890Sstevel@tonic-gate connp->conn_ulp = IPPROTO_TCP; 20900Sstevel@tonic-gate tcp->tcp_connp = connp; 20910Sstevel@tonic-gate return (0); 20920Sstevel@tonic-gate } 20930Sstevel@tonic-gate 20940Sstevel@tonic-gate /* ARGSUSED */ 20950Sstevel@tonic-gate static void 20965240Snordmark tcp_conn_destructor(void *buf, void *cdrarg) 20975240Snordmark { 20985240Snordmark itc_t *itc = (itc_t *)buf; 20995240Snordmark conn_t *connp = &itc->itc_conn; 21005240Snordmark tcp_t *tcp = (tcp_t *)&itc[1]; 21015240Snordmark 21025240Snordmark ASSERT(connp->conn_flags & IPCL_TCPCONN); 21035240Snordmark ASSERT(tcp->tcp_connp == connp); 21045240Snordmark ASSERT(connp->conn_tcp == tcp); 21055240Snordmark tcp_timermp_free(tcp); 21065240Snordmark mutex_destroy(&connp->conn_lock); 21075240Snordmark cv_destroy(&connp->conn_cv); 21088348SEric.Yu@Sun.COM cv_destroy(&connp->conn_sq_cv); 21095240Snordmark } 21105240Snordmark 21115240Snordmark /* ARGSUSED */ 21125240Snordmark static int 21135240Snordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 21145240Snordmark { 21155240Snordmark itc_t *itc = (itc_t *)buf; 21165240Snordmark conn_t *connp = &itc->itc_conn; 21175240Snordmark 21185240Snordmark bzero(connp, sizeof (conn_t)); 21195240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 21205240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 21215240Snordmark connp->conn_flags = IPCL_IPCCONN; 21225240Snordmark 21235240Snordmark return (0); 21245240Snordmark } 21255240Snordmark 21265240Snordmark /* ARGSUSED */ 21275240Snordmark static void 21285240Snordmark ip_conn_destructor(void *buf, void *cdrarg) 21295240Snordmark { 21305240Snordmark itc_t *itc = (itc_t *)buf; 21315240Snordmark conn_t *connp = &itc->itc_conn; 21325240Snordmark 21335240Snordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 21345240Snordmark ASSERT(connp->conn_priv == NULL); 21355240Snordmark mutex_destroy(&connp->conn_lock); 21365240Snordmark cv_destroy(&connp->conn_cv); 21375240Snordmark } 21385240Snordmark 21395240Snordmark /* ARGSUSED */ 21405240Snordmark static int 21415240Snordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 21425240Snordmark { 21435240Snordmark itc_t *itc = (itc_t *)buf; 21445240Snordmark conn_t *connp = &itc->itc_conn; 21455240Snordmark udp_t *udp = (udp_t *)&itc[1]; 21465240Snordmark 21475240Snordmark bzero(connp, sizeof (conn_t)); 21485240Snordmark bzero(udp, sizeof (udp_t)); 21495240Snordmark 21505240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 21515240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 21525240Snordmark connp->conn_udp = udp; 21535240Snordmark connp->conn_flags = IPCL_UDPCONN; 21545240Snordmark connp->conn_ulp = IPPROTO_UDP; 21555240Snordmark udp->udp_connp = connp; 21565240Snordmark return (0); 21575240Snordmark } 21585240Snordmark 21595240Snordmark /* ARGSUSED */ 21605240Snordmark static void 21615240Snordmark udp_conn_destructor(void *buf, void *cdrarg) 21625240Snordmark { 21635240Snordmark itc_t *itc = (itc_t *)buf; 21645240Snordmark conn_t *connp = &itc->itc_conn; 21655240Snordmark udp_t *udp = (udp_t *)&itc[1]; 21665240Snordmark 21675240Snordmark ASSERT(connp->conn_flags & IPCL_UDPCONN); 21685240Snordmark ASSERT(udp->udp_connp == connp); 21695240Snordmark ASSERT(connp->conn_udp == udp); 21705240Snordmark mutex_destroy(&connp->conn_lock); 21715240Snordmark cv_destroy(&connp->conn_cv); 21725240Snordmark } 21735240Snordmark 21745240Snordmark /* ARGSUSED */ 21755240Snordmark static int 21765240Snordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 21770Sstevel@tonic-gate { 21785240Snordmark itc_t *itc = (itc_t *)buf; 21795240Snordmark conn_t *connp = &itc->itc_conn; 21805240Snordmark icmp_t *icmp = (icmp_t *)&itc[1]; 21815240Snordmark 21825240Snordmark bzero(connp, sizeof (conn_t)); 21835240Snordmark bzero(icmp, sizeof (icmp_t)); 21845240Snordmark 21855240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 21865240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 21875240Snordmark connp->conn_icmp = icmp; 21885240Snordmark connp->conn_flags = IPCL_RAWIPCONN; 21895240Snordmark connp->conn_ulp = IPPROTO_ICMP; 21905240Snordmark icmp->icmp_connp = connp; 21915240Snordmark return (0); 21925240Snordmark } 21935240Snordmark 21945240Snordmark /* ARGSUSED */ 21955240Snordmark static void 21965240Snordmark rawip_conn_destructor(void *buf, void *cdrarg) 21975240Snordmark { 21985240Snordmark itc_t *itc = (itc_t *)buf; 21995240Snordmark conn_t *connp = &itc->itc_conn; 22005240Snordmark icmp_t *icmp = (icmp_t *)&itc[1]; 22015240Snordmark 22025240Snordmark ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 22035240Snordmark ASSERT(icmp->icmp_connp == connp); 22045240Snordmark ASSERT(connp->conn_icmp == icmp); 22055240Snordmark mutex_destroy(&connp->conn_lock); 22065240Snordmark cv_destroy(&connp->conn_cv); 22075240Snordmark } 22085240Snordmark 22095240Snordmark /* ARGSUSED */ 22105240Snordmark static int 22115240Snordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 22125240Snordmark { 22135240Snordmark itc_t *itc = (itc_t *)buf; 22145240Snordmark conn_t *connp = &itc->itc_conn; 22155240Snordmark rts_t *rts = (rts_t *)&itc[1]; 22165240Snordmark 22175240Snordmark bzero(connp, sizeof (conn_t)); 22185240Snordmark bzero(rts, sizeof (rts_t)); 22195240Snordmark 22205240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 22215240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 22225240Snordmark connp->conn_rts = rts; 22235240Snordmark connp->conn_flags = IPCL_RTSCONN; 22245240Snordmark rts->rts_connp = connp; 22255240Snordmark return (0); 22265240Snordmark } 22275240Snordmark 22285240Snordmark /* ARGSUSED */ 22295240Snordmark static void 22305240Snordmark rts_conn_destructor(void *buf, void *cdrarg) 22315240Snordmark { 22325240Snordmark itc_t *itc = (itc_t *)buf; 22335240Snordmark conn_t *connp = &itc->itc_conn; 22345240Snordmark rts_t *rts = (rts_t *)&itc[1]; 22355240Snordmark 22365240Snordmark ASSERT(connp->conn_flags & IPCL_RTSCONN); 22375240Snordmark ASSERT(rts->rts_connp == connp); 22385240Snordmark ASSERT(connp->conn_rts == rts); 22395240Snordmark mutex_destroy(&connp->conn_lock); 22405240Snordmark cv_destroy(&connp->conn_cv); 22415240Snordmark } 22425240Snordmark 22438348SEric.Yu@Sun.COM /* ARGSUSED */ 22448348SEric.Yu@Sun.COM int 22458348SEric.Yu@Sun.COM ip_helper_stream_constructor(void *buf, void *cdrarg, int kmflags) 22468348SEric.Yu@Sun.COM { 22478348SEric.Yu@Sun.COM int error; 22488348SEric.Yu@Sun.COM netstack_t *ns; 22498348SEric.Yu@Sun.COM int ret; 22508348SEric.Yu@Sun.COM tcp_stack_t *tcps; 22518348SEric.Yu@Sun.COM ip_helper_stream_info_t *ip_helper_str; 22528348SEric.Yu@Sun.COM ip_stack_t *ipst; 22538348SEric.Yu@Sun.COM 22548348SEric.Yu@Sun.COM ns = netstack_find_by_cred(kcred); 22558348SEric.Yu@Sun.COM ASSERT(ns != NULL); 22568348SEric.Yu@Sun.COM tcps = ns->netstack_tcp; 22578348SEric.Yu@Sun.COM ipst = ns->netstack_ip; 22588348SEric.Yu@Sun.COM ASSERT(tcps != NULL); 22598348SEric.Yu@Sun.COM ip_helper_str = (ip_helper_stream_info_t *)buf; 22608348SEric.Yu@Sun.COM 22618444SRao.Shoaib@Sun.COM do { 22628444SRao.Shoaib@Sun.COM error = ldi_open_by_name(DEV_IP, IP_HELPER_STR, kcred, 22638444SRao.Shoaib@Sun.COM &ip_helper_str->iphs_handle, ipst->ips_ldi_ident); 22648444SRao.Shoaib@Sun.COM } while (error == EINTR); 22658444SRao.Shoaib@Sun.COM 22668444SRao.Shoaib@Sun.COM if (error == 0) { 22678444SRao.Shoaib@Sun.COM do { 22688444SRao.Shoaib@Sun.COM error = ldi_ioctl( 22698444SRao.Shoaib@Sun.COM ip_helper_str->iphs_handle, SIOCSQPTR, 22708444SRao.Shoaib@Sun.COM (intptr_t)buf, FKIOCTL, kcred, &ret); 22718444SRao.Shoaib@Sun.COM } while (error == EINTR); 22728444SRao.Shoaib@Sun.COM 22738444SRao.Shoaib@Sun.COM if (error != 0) { 22748444SRao.Shoaib@Sun.COM (void) ldi_close( 22758444SRao.Shoaib@Sun.COM ip_helper_str->iphs_handle, 0, kcred); 22768444SRao.Shoaib@Sun.COM } 22778348SEric.Yu@Sun.COM } 22788444SRao.Shoaib@Sun.COM 22798348SEric.Yu@Sun.COM netstack_rele(ipst->ips_netstack); 22808444SRao.Shoaib@Sun.COM 22818348SEric.Yu@Sun.COM return (error); 22828348SEric.Yu@Sun.COM } 22838348SEric.Yu@Sun.COM 22848348SEric.Yu@Sun.COM /* ARGSUSED */ 22858348SEric.Yu@Sun.COM static void 22868348SEric.Yu@Sun.COM ip_helper_stream_destructor(void *buf, void *cdrarg) 22878348SEric.Yu@Sun.COM { 22888348SEric.Yu@Sun.COM ip_helper_stream_info_t *ip_helper_str = (ip_helper_stream_info_t *)buf; 22898348SEric.Yu@Sun.COM 22908444SRao.Shoaib@Sun.COM ip_helper_str->iphs_rq->q_ptr = 22918444SRao.Shoaib@Sun.COM ip_helper_str->iphs_wq->q_ptr = 22928444SRao.Shoaib@Sun.COM ip_helper_str->iphs_minfo; 22938444SRao.Shoaib@Sun.COM (void) ldi_close(ip_helper_str->iphs_handle, 0, kcred); 22948348SEric.Yu@Sun.COM } 22958348SEric.Yu@Sun.COM 22968348SEric.Yu@Sun.COM 22975240Snordmark /* 22985240Snordmark * Called as part of ipcl_conn_destroy to assert and clear any pointers 22995240Snordmark * in the conn_t. 23005240Snordmark */ 23015240Snordmark void 23025240Snordmark ipcl_conn_cleanup(conn_t *connp) 23035240Snordmark { 23045240Snordmark ASSERT(connp->conn_ire_cache == NULL); 23055240Snordmark ASSERT(connp->conn_latch == NULL); 23065240Snordmark #ifdef notdef 23075240Snordmark ASSERT(connp->conn_rq == NULL); 23085240Snordmark ASSERT(connp->conn_wq == NULL); 23095240Snordmark #endif 23105240Snordmark ASSERT(connp->conn_cred == NULL); 23115240Snordmark ASSERT(connp->conn_g_fanout == NULL); 23125240Snordmark ASSERT(connp->conn_g_next == NULL); 23135240Snordmark ASSERT(connp->conn_g_prev == NULL); 23145240Snordmark ASSERT(connp->conn_policy == NULL); 23155240Snordmark ASSERT(connp->conn_fanout == NULL); 23165240Snordmark ASSERT(connp->conn_next == NULL); 23175240Snordmark ASSERT(connp->conn_prev == NULL); 23185240Snordmark #ifdef notdef 23195240Snordmark /* 23205240Snordmark * The ill and ipif pointers are not cleared before the conn_t 23215240Snordmark * goes away since they do not hold a reference on the ill/ipif. 23225240Snordmark * We should replace these pointers with ifindex/ipaddr_t to 23235240Snordmark * make the code less complex. 23245240Snordmark */ 23255240Snordmark ASSERT(connp->conn_outgoing_ill == NULL); 23265240Snordmark ASSERT(connp->conn_incoming_ill == NULL); 23275240Snordmark ASSERT(connp->conn_multicast_ipif == NULL); 23285240Snordmark ASSERT(connp->conn_multicast_ill == NULL); 23295240Snordmark #endif 23305240Snordmark ASSERT(connp->conn_oper_pending_ill == NULL); 23315240Snordmark ASSERT(connp->conn_ilg == NULL); 23325240Snordmark ASSERT(connp->conn_drain_next == NULL); 23335240Snordmark ASSERT(connp->conn_drain_prev == NULL); 23345277Snordmark #ifdef notdef 23355277Snordmark /* conn_idl is not cleared when removed from idl list */ 23365240Snordmark ASSERT(connp->conn_idl == NULL); 23375277Snordmark #endif 23385240Snordmark ASSERT(connp->conn_ipsec_opt_mp == NULL); 23395240Snordmark ASSERT(connp->conn_peercred == NULL); 23405240Snordmark ASSERT(connp->conn_netstack == NULL); 23415240Snordmark 23428348SEric.Yu@Sun.COM ASSERT(connp->conn_helper_info == NULL); 23435240Snordmark /* Clear out the conn_t fields that are not preserved */ 23445240Snordmark bzero(&connp->conn_start_clr, 23455240Snordmark sizeof (conn_t) - 23465240Snordmark ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 23470Sstevel@tonic-gate } 23480Sstevel@tonic-gate 23490Sstevel@tonic-gate /* 23500Sstevel@tonic-gate * All conns are inserted in a global multi-list for the benefit of 23510Sstevel@tonic-gate * walkers. The walk is guaranteed to walk all open conns at the time 23520Sstevel@tonic-gate * of the start of the walk exactly once. This property is needed to 23530Sstevel@tonic-gate * achieve some cleanups during unplumb of interfaces. This is achieved 23540Sstevel@tonic-gate * as follows. 23550Sstevel@tonic-gate * 23560Sstevel@tonic-gate * ipcl_conn_create and ipcl_conn_destroy are the only functions that 23570Sstevel@tonic-gate * call the insert and delete functions below at creation and deletion 23580Sstevel@tonic-gate * time respectively. The conn never moves or changes its position in this 23590Sstevel@tonic-gate * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 23600Sstevel@tonic-gate * won't increase due to walkers, once the conn deletion has started. Note 23610Sstevel@tonic-gate * that we can't remove the conn from the global list and then wait for 23620Sstevel@tonic-gate * the refcnt to drop to zero, since walkers would then see a truncated 23630Sstevel@tonic-gate * list. CONN_INCIPIENT ensures that walkers don't start looking at 23640Sstevel@tonic-gate * conns until ip_open is ready to make them globally visible. 23650Sstevel@tonic-gate * The global round robin multi-list locks are held only to get the 23660Sstevel@tonic-gate * next member/insertion/deletion and contention should be negligible 23670Sstevel@tonic-gate * if the multi-list is much greater than the number of cpus. 23680Sstevel@tonic-gate */ 23690Sstevel@tonic-gate void 23700Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp) 23710Sstevel@tonic-gate { 23720Sstevel@tonic-gate int index; 23733448Sdh155122 struct connf_s *connfp; 23743448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 23750Sstevel@tonic-gate 23760Sstevel@tonic-gate /* 23770Sstevel@tonic-gate * No need for atomic here. Approximate even distribution 23780Sstevel@tonic-gate * in the global lists is sufficient. 23790Sstevel@tonic-gate */ 23803448Sdh155122 ipst->ips_conn_g_index++; 23813448Sdh155122 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 23820Sstevel@tonic-gate 23830Sstevel@tonic-gate connp->conn_g_prev = NULL; 23840Sstevel@tonic-gate /* 23850Sstevel@tonic-gate * Mark as INCIPIENT, so that walkers will ignore this 23860Sstevel@tonic-gate * for now, till ip_open is ready to make it visible globally. 23870Sstevel@tonic-gate */ 23880Sstevel@tonic-gate connp->conn_state_flags |= CONN_INCIPIENT; 23890Sstevel@tonic-gate 23903448Sdh155122 connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 23910Sstevel@tonic-gate /* Insert at the head of the list */ 23923448Sdh155122 mutex_enter(&connfp->connf_lock); 23933448Sdh155122 connp->conn_g_next = connfp->connf_head; 23940Sstevel@tonic-gate if (connp->conn_g_next != NULL) 23950Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp; 23963448Sdh155122 connfp->connf_head = connp; 23970Sstevel@tonic-gate 23980Sstevel@tonic-gate /* The fanout bucket this conn points to */ 23993448Sdh155122 connp->conn_g_fanout = connfp; 24000Sstevel@tonic-gate 24013448Sdh155122 mutex_exit(&connfp->connf_lock); 24020Sstevel@tonic-gate } 24030Sstevel@tonic-gate 24040Sstevel@tonic-gate void 24050Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp) 24060Sstevel@tonic-gate { 24073448Sdh155122 struct connf_s *connfp; 24083448Sdh155122 24090Sstevel@tonic-gate /* 24100Sstevel@tonic-gate * We were never inserted in the global multi list. 24110Sstevel@tonic-gate * IPCL_NONE variety is never inserted in the global multilist 24120Sstevel@tonic-gate * since it is presumed to not need any cleanup and is transient. 24130Sstevel@tonic-gate */ 24140Sstevel@tonic-gate if (connp->conn_g_fanout == NULL) 24150Sstevel@tonic-gate return; 24160Sstevel@tonic-gate 24173448Sdh155122 connfp = connp->conn_g_fanout; 24183448Sdh155122 mutex_enter(&connfp->connf_lock); 24190Sstevel@tonic-gate if (connp->conn_g_prev != NULL) 24200Sstevel@tonic-gate connp->conn_g_prev->conn_g_next = connp->conn_g_next; 24210Sstevel@tonic-gate else 24223448Sdh155122 connfp->connf_head = connp->conn_g_next; 24230Sstevel@tonic-gate if (connp->conn_g_next != NULL) 24240Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 24253448Sdh155122 mutex_exit(&connfp->connf_lock); 24260Sstevel@tonic-gate 24270Sstevel@tonic-gate /* Better to stumble on a null pointer than to corrupt memory */ 24280Sstevel@tonic-gate connp->conn_g_next = NULL; 24290Sstevel@tonic-gate connp->conn_g_prev = NULL; 24305240Snordmark connp->conn_g_fanout = NULL; 24310Sstevel@tonic-gate } 24320Sstevel@tonic-gate 24330Sstevel@tonic-gate /* 24340Sstevel@tonic-gate * Walk the list of all conn_t's in the system, calling the function provided 24350Sstevel@tonic-gate * with the specified argument for each. 24360Sstevel@tonic-gate * Applies to both IPv4 and IPv6. 24370Sstevel@tonic-gate * 24380Sstevel@tonic-gate * IPCs may hold pointers to ipif/ill. To guard against stale pointers 24390Sstevel@tonic-gate * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 24400Sstevel@tonic-gate * unplumbed or removed. New conn_t's that are created while we are walking 24410Sstevel@tonic-gate * may be missed by this walk, because they are not necessarily inserted 24420Sstevel@tonic-gate * at the tail of the list. They are new conn_t's and thus don't have any 24430Sstevel@tonic-gate * stale pointers. The CONN_CLOSING flag ensures that no new reference 24440Sstevel@tonic-gate * is created to the struct that is going away. 24450Sstevel@tonic-gate */ 24460Sstevel@tonic-gate void 24473448Sdh155122 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 24480Sstevel@tonic-gate { 24490Sstevel@tonic-gate int i; 24500Sstevel@tonic-gate conn_t *connp; 24510Sstevel@tonic-gate conn_t *prev_connp; 24520Sstevel@tonic-gate 24530Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 24543448Sdh155122 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 24550Sstevel@tonic-gate prev_connp = NULL; 24563448Sdh155122 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 24570Sstevel@tonic-gate while (connp != NULL) { 24580Sstevel@tonic-gate mutex_enter(&connp->conn_lock); 24590Sstevel@tonic-gate if (connp->conn_state_flags & 24600Sstevel@tonic-gate (CONN_CONDEMNED | CONN_INCIPIENT)) { 24610Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 24620Sstevel@tonic-gate connp = connp->conn_g_next; 24630Sstevel@tonic-gate continue; 24640Sstevel@tonic-gate } 24650Sstevel@tonic-gate CONN_INC_REF_LOCKED(connp); 24660Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 24673448Sdh155122 mutex_exit( 24683448Sdh155122 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 24690Sstevel@tonic-gate (*func)(connp, arg); 24700Sstevel@tonic-gate if (prev_connp != NULL) 24710Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 24723448Sdh155122 mutex_enter( 24733448Sdh155122 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 24740Sstevel@tonic-gate prev_connp = connp; 24750Sstevel@tonic-gate connp = connp->conn_g_next; 24760Sstevel@tonic-gate } 24773448Sdh155122 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 24780Sstevel@tonic-gate if (prev_connp != NULL) 24790Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 24800Sstevel@tonic-gate } 24810Sstevel@tonic-gate } 24820Sstevel@tonic-gate 24830Sstevel@tonic-gate /* 24840Sstevel@tonic-gate * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 24850Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 24860Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 24872323Sethindra * (peer tcp in ESTABLISHED state). 24880Sstevel@tonic-gate */ 24890Sstevel@tonic-gate conn_t * 24903448Sdh155122 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph, 24913448Sdh155122 ip_stack_t *ipst) 24920Sstevel@tonic-gate { 24930Sstevel@tonic-gate uint32_t ports; 24940Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 24950Sstevel@tonic-gate connf_t *connfp; 24960Sstevel@tonic-gate conn_t *tconnp; 24970Sstevel@tonic-gate boolean_t zone_chk; 24980Sstevel@tonic-gate 24990Sstevel@tonic-gate /* 25000Sstevel@tonic-gate * If either the source of destination address is loopback, then 25010Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 25020Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 25030Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. 25040Sstevel@tonic-gate */ 25050Sstevel@tonic-gate zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 25060Sstevel@tonic-gate ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 25070Sstevel@tonic-gate 25080Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 25090Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 25100Sstevel@tonic-gate 25113448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 25123448Sdh155122 ports, ipst)]; 25130Sstevel@tonic-gate 25140Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25150Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25160Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25170Sstevel@tonic-gate 25180Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 25190Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 25202323Sethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 25210Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 25220Sstevel@tonic-gate 25230Sstevel@tonic-gate ASSERT(tconnp != connp); 25240Sstevel@tonic-gate CONN_INC_REF(tconnp); 25250Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25260Sstevel@tonic-gate return (tconnp); 25270Sstevel@tonic-gate } 25280Sstevel@tonic-gate } 25290Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25300Sstevel@tonic-gate return (NULL); 25310Sstevel@tonic-gate } 25320Sstevel@tonic-gate 25330Sstevel@tonic-gate /* 25340Sstevel@tonic-gate * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 25350Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 25360Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 25372323Sethindra * (peer tcp in ESTABLISHED state). 25380Sstevel@tonic-gate */ 25390Sstevel@tonic-gate conn_t * 25403448Sdh155122 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph, 25413448Sdh155122 ip_stack_t *ipst) 25420Sstevel@tonic-gate { 25430Sstevel@tonic-gate uint32_t ports; 25440Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 25450Sstevel@tonic-gate connf_t *connfp; 25460Sstevel@tonic-gate conn_t *tconnp; 25470Sstevel@tonic-gate boolean_t zone_chk; 25480Sstevel@tonic-gate 25490Sstevel@tonic-gate /* 25500Sstevel@tonic-gate * If either the source of destination address is loopback, then 25510Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 25520Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 25530Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. We 25540Sstevel@tonic-gate * don't do Zone check for link local address(es) because the 25550Sstevel@tonic-gate * current Zone implementation treats each link local address as 25560Sstevel@tonic-gate * being unique per system node, i.e. they belong to global Zone. 25570Sstevel@tonic-gate */ 25580Sstevel@tonic-gate zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 25590Sstevel@tonic-gate IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 25600Sstevel@tonic-gate 25610Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 25620Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 25630Sstevel@tonic-gate 25643448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 25653448Sdh155122 ports, ipst)]; 25660Sstevel@tonic-gate 25670Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25680Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25690Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25700Sstevel@tonic-gate 25710Sstevel@tonic-gate /* We skip tcp_bound_if check here as this is loopback tcp */ 25720Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 25730Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 25742323Sethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 25750Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 25760Sstevel@tonic-gate 25770Sstevel@tonic-gate ASSERT(tconnp != connp); 25780Sstevel@tonic-gate CONN_INC_REF(tconnp); 25790Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25800Sstevel@tonic-gate return (tconnp); 25810Sstevel@tonic-gate } 25820Sstevel@tonic-gate } 25830Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25840Sstevel@tonic-gate return (NULL); 25850Sstevel@tonic-gate } 25860Sstevel@tonic-gate 25870Sstevel@tonic-gate /* 25880Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 25890Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 25900Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 25910Sstevel@tonic-gate */ 25920Sstevel@tonic-gate conn_t * 25933448Sdh155122 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state, 25943448Sdh155122 ip_stack_t *ipst) 25950Sstevel@tonic-gate { 25960Sstevel@tonic-gate uint32_t ports; 25970Sstevel@tonic-gate uint16_t *pports; 25980Sstevel@tonic-gate connf_t *connfp; 25990Sstevel@tonic-gate conn_t *tconnp; 26000Sstevel@tonic-gate 26010Sstevel@tonic-gate pports = (uint16_t *)&ports; 26020Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 26030Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 26040Sstevel@tonic-gate 26053448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 26064691Skcpoon ports, ipst)]; 26070Sstevel@tonic-gate 26080Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 26090Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 26100Sstevel@tonic-gate tconnp = tconnp->conn_next) { 26110Sstevel@tonic-gate 26120Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 26130Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 26140Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= min_state) { 26150Sstevel@tonic-gate 26160Sstevel@tonic-gate CONN_INC_REF(tconnp); 26170Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 26180Sstevel@tonic-gate return (tconnp); 26190Sstevel@tonic-gate } 26200Sstevel@tonic-gate } 26210Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 26220Sstevel@tonic-gate return (NULL); 26230Sstevel@tonic-gate } 26240Sstevel@tonic-gate 26250Sstevel@tonic-gate /* 26260Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 26270Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 26280Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 26290Sstevel@tonic-gate * Match on ifindex in addition to addresses. 26300Sstevel@tonic-gate */ 26310Sstevel@tonic-gate conn_t * 26320Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 26333448Sdh155122 uint_t ifindex, ip_stack_t *ipst) 26340Sstevel@tonic-gate { 26350Sstevel@tonic-gate tcp_t *tcp; 26360Sstevel@tonic-gate uint32_t ports; 26370Sstevel@tonic-gate uint16_t *pports; 26380Sstevel@tonic-gate connf_t *connfp; 26390Sstevel@tonic-gate conn_t *tconnp; 26400Sstevel@tonic-gate 26410Sstevel@tonic-gate pports = (uint16_t *)&ports; 26420Sstevel@tonic-gate pports[0] = tcpha->tha_fport; 26430Sstevel@tonic-gate pports[1] = tcpha->tha_lport; 26440Sstevel@tonic-gate 26453448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 26464691Skcpoon ports, ipst)]; 26470Sstevel@tonic-gate 26480Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 26490Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 26500Sstevel@tonic-gate tconnp = tconnp->conn_next) { 26510Sstevel@tonic-gate 26520Sstevel@tonic-gate tcp = tconnp->conn_tcp; 26530Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 26540Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 26550Sstevel@tonic-gate tcp->tcp_state >= min_state && 26560Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 26570Sstevel@tonic-gate tcp->tcp_bound_if == ifindex)) { 26580Sstevel@tonic-gate 26590Sstevel@tonic-gate CONN_INC_REF(tconnp); 26600Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 26610Sstevel@tonic-gate return (tconnp); 26620Sstevel@tonic-gate } 26630Sstevel@tonic-gate } 26640Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 26650Sstevel@tonic-gate return (NULL); 26660Sstevel@tonic-gate } 26670Sstevel@tonic-gate 26680Sstevel@tonic-gate /* 26691676Sjpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 26701676Sjpk * a listener when changing state. 26710Sstevel@tonic-gate */ 26720Sstevel@tonic-gate conn_t * 26733448Sdh155122 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 26743448Sdh155122 ip_stack_t *ipst) 26750Sstevel@tonic-gate { 26760Sstevel@tonic-gate connf_t *bind_connfp; 26770Sstevel@tonic-gate conn_t *connp; 26780Sstevel@tonic-gate tcp_t *tcp; 26790Sstevel@tonic-gate 26800Sstevel@tonic-gate /* 26810Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 26820Sstevel@tonic-gate * all zeros. 26830Sstevel@tonic-gate */ 26840Sstevel@tonic-gate if (laddr == 0) 26850Sstevel@tonic-gate return (NULL); 26860Sstevel@tonic-gate 26871676Sjpk ASSERT(zoneid != ALL_ZONES); 26881676Sjpk 26893448Sdh155122 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 26900Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 26910Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 26920Sstevel@tonic-gate connp = connp->conn_next) { 26930Sstevel@tonic-gate tcp = connp->conn_tcp; 26940Sstevel@tonic-gate if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 26952263Ssommerfe IPCL_ZONE_MATCH(connp, zoneid) && 26960Sstevel@tonic-gate (tcp->tcp_listener == NULL)) { 26970Sstevel@tonic-gate CONN_INC_REF(connp); 26980Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26990Sstevel@tonic-gate return (connp); 27000Sstevel@tonic-gate } 27010Sstevel@tonic-gate } 27020Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 27030Sstevel@tonic-gate return (NULL); 27040Sstevel@tonic-gate } 27050Sstevel@tonic-gate 27061676Sjpk /* 27071676Sjpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 27081676Sjpk * a listener when changing state. 27091676Sjpk */ 27100Sstevel@tonic-gate conn_t * 27110Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 27123448Sdh155122 zoneid_t zoneid, ip_stack_t *ipst) 27130Sstevel@tonic-gate { 27140Sstevel@tonic-gate connf_t *bind_connfp; 27150Sstevel@tonic-gate conn_t *connp = NULL; 27160Sstevel@tonic-gate tcp_t *tcp; 27170Sstevel@tonic-gate 27180Sstevel@tonic-gate /* 27190Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 27200Sstevel@tonic-gate * all zeros. 27210Sstevel@tonic-gate */ 27220Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 27230Sstevel@tonic-gate return (NULL); 27240Sstevel@tonic-gate 27251676Sjpk ASSERT(zoneid != ALL_ZONES); 27260Sstevel@tonic-gate 27273448Sdh155122 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 27280Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 27290Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 27300Sstevel@tonic-gate connp = connp->conn_next) { 27310Sstevel@tonic-gate tcp = connp->conn_tcp; 27320Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 27332263Ssommerfe IPCL_ZONE_MATCH(connp, zoneid) && 27340Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 27350Sstevel@tonic-gate tcp->tcp_bound_if == ifindex) && 27360Sstevel@tonic-gate tcp->tcp_listener == NULL) { 27370Sstevel@tonic-gate CONN_INC_REF(connp); 27380Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 27390Sstevel@tonic-gate return (connp); 27400Sstevel@tonic-gate } 27410Sstevel@tonic-gate } 27420Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 27430Sstevel@tonic-gate return (NULL); 27440Sstevel@tonic-gate } 27450Sstevel@tonic-gate 2746741Smasputra /* 2747741Smasputra * ipcl_get_next_conn 2748741Smasputra * get the next entry in the conn global list 2749741Smasputra * and put a reference on the next_conn. 2750741Smasputra * decrement the reference on the current conn. 2751741Smasputra * 2752741Smasputra * This is an iterator based walker function that also provides for 2753741Smasputra * some selection by the caller. It walks through the conn_hash bucket 2754741Smasputra * searching for the next valid connp in the list, and selects connections 2755741Smasputra * that are neither closed nor condemned. It also REFHOLDS the conn 2756741Smasputra * thus ensuring that the conn exists when the caller uses the conn. 2757741Smasputra */ 2758741Smasputra conn_t * 2759741Smasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2760741Smasputra { 2761741Smasputra conn_t *next_connp; 2762741Smasputra 2763741Smasputra if (connfp == NULL) 2764741Smasputra return (NULL); 2765741Smasputra 2766741Smasputra mutex_enter(&connfp->connf_lock); 2767741Smasputra 2768741Smasputra next_connp = (connp == NULL) ? 2769741Smasputra connfp->connf_head : connp->conn_g_next; 2770741Smasputra 2771741Smasputra while (next_connp != NULL) { 2772741Smasputra mutex_enter(&next_connp->conn_lock); 2773741Smasputra if (!(next_connp->conn_flags & conn_flags) || 2774741Smasputra (next_connp->conn_state_flags & 2775741Smasputra (CONN_CONDEMNED | CONN_INCIPIENT))) { 2776741Smasputra /* 2777741Smasputra * This conn has been condemned or 2778741Smasputra * is closing, or the flags don't match 2779741Smasputra */ 2780741Smasputra mutex_exit(&next_connp->conn_lock); 2781741Smasputra next_connp = next_connp->conn_g_next; 2782741Smasputra continue; 2783741Smasputra } 2784741Smasputra CONN_INC_REF_LOCKED(next_connp); 2785741Smasputra mutex_exit(&next_connp->conn_lock); 2786741Smasputra break; 2787741Smasputra } 2788741Smasputra 2789741Smasputra mutex_exit(&connfp->connf_lock); 2790741Smasputra 2791741Smasputra if (connp != NULL) 2792741Smasputra CONN_DEC_REF(connp); 2793741Smasputra 2794741Smasputra return (next_connp); 2795741Smasputra } 2796741Smasputra 27970Sstevel@tonic-gate #ifdef CONN_DEBUG 27980Sstevel@tonic-gate /* 27990Sstevel@tonic-gate * Trace of the last NBUF refhold/refrele 28000Sstevel@tonic-gate */ 28010Sstevel@tonic-gate int 28020Sstevel@tonic-gate conn_trace_ref(conn_t *connp) 28030Sstevel@tonic-gate { 28040Sstevel@tonic-gate int last; 28050Sstevel@tonic-gate conn_trace_t *ctb; 28060Sstevel@tonic-gate 28070Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 28080Sstevel@tonic-gate last = connp->conn_trace_last; 28090Sstevel@tonic-gate last++; 28100Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 28110Sstevel@tonic-gate last = 0; 28120Sstevel@tonic-gate 28130Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 28145023Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 28150Sstevel@tonic-gate connp->conn_trace_last = last; 28160Sstevel@tonic-gate return (1); 28170Sstevel@tonic-gate } 28180Sstevel@tonic-gate 28190Sstevel@tonic-gate int 28200Sstevel@tonic-gate conn_untrace_ref(conn_t *connp) 28210Sstevel@tonic-gate { 28220Sstevel@tonic-gate int last; 28230Sstevel@tonic-gate conn_trace_t *ctb; 28240Sstevel@tonic-gate 28250Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 28260Sstevel@tonic-gate last = connp->conn_trace_last; 28270Sstevel@tonic-gate last++; 28280Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 28290Sstevel@tonic-gate last = 0; 28300Sstevel@tonic-gate 28310Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 28325023Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 28330Sstevel@tonic-gate connp->conn_trace_last = last; 28340Sstevel@tonic-gate return (1); 28350Sstevel@tonic-gate } 28360Sstevel@tonic-gate #endif 2837