10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51503Sericheng * Common Development and Distribution License (the "License"). 61503Sericheng * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 228485SPeter.Memishian@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* 270Sstevel@tonic-gate * IP PACKET CLASSIFIER 280Sstevel@tonic-gate * 290Sstevel@tonic-gate * The IP packet classifier provides mapping between IP packets and persistent 300Sstevel@tonic-gate * connection state for connection-oriented protocols. It also provides 310Sstevel@tonic-gate * interface for managing connection states. 320Sstevel@tonic-gate * 330Sstevel@tonic-gate * The connection state is kept in conn_t data structure and contains, among 340Sstevel@tonic-gate * other things: 350Sstevel@tonic-gate * 360Sstevel@tonic-gate * o local/remote address and ports 370Sstevel@tonic-gate * o Transport protocol 380Sstevel@tonic-gate * o squeue for the connection (for TCP only) 390Sstevel@tonic-gate * o reference counter 400Sstevel@tonic-gate * o Connection state 410Sstevel@tonic-gate * o hash table linkage 420Sstevel@tonic-gate * o interface/ire information 430Sstevel@tonic-gate * o credentials 440Sstevel@tonic-gate * o ipsec policy 450Sstevel@tonic-gate * o send and receive functions. 460Sstevel@tonic-gate * o mutex lock. 470Sstevel@tonic-gate * 480Sstevel@tonic-gate * Connections use a reference counting scheme. They are freed when the 490Sstevel@tonic-gate * reference counter drops to zero. A reference is incremented when connection 500Sstevel@tonic-gate * is placed in a list or table, when incoming packet for the connection arrives 510Sstevel@tonic-gate * and when connection is processed via squeue (squeue processing may be 520Sstevel@tonic-gate * asynchronous and the reference protects the connection from being destroyed 530Sstevel@tonic-gate * before its processing is finished). 540Sstevel@tonic-gate * 5511042SErik.Nordmark@Sun.COM * conn_recv is used to pass up packets to the ULP. 5611042SErik.Nordmark@Sun.COM * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for 5711042SErik.Nordmark@Sun.COM * a listener, and changes to tcp_input_listener as the listener has picked a 5811042SErik.Nordmark@Sun.COM * good squeue. For other cases it is set to tcp_input_data. 5911042SErik.Nordmark@Sun.COM * 6011042SErik.Nordmark@Sun.COM * conn_recvicmp is used to pass up ICMP errors to the ULP. 610Sstevel@tonic-gate * 620Sstevel@tonic-gate * Classifier uses several hash tables: 630Sstevel@tonic-gate * 640Sstevel@tonic-gate * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 650Sstevel@tonic-gate * ipcl_bind_fanout: contains all connections in BOUND state 660Sstevel@tonic-gate * ipcl_proto_fanout: IPv4 protocol fanout 670Sstevel@tonic-gate * ipcl_proto_fanout_v6: IPv6 protocol fanout 680Sstevel@tonic-gate * ipcl_udp_fanout: contains all UDP connections 6910616SSebastien.Roy@Sun.COM * ipcl_iptun_fanout: contains all IP tunnel connections 700Sstevel@tonic-gate * ipcl_globalhash_fanout: contains all connections 710Sstevel@tonic-gate * 720Sstevel@tonic-gate * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 730Sstevel@tonic-gate * which need to view all existing connections. 740Sstevel@tonic-gate * 750Sstevel@tonic-gate * All tables are protected by per-bucket locks. When both per-bucket lock and 760Sstevel@tonic-gate * connection lock need to be held, the per-bucket lock should be acquired 770Sstevel@tonic-gate * first, followed by the connection lock. 780Sstevel@tonic-gate * 790Sstevel@tonic-gate * All functions doing search in one of these tables increment a reference 800Sstevel@tonic-gate * counter on the connection found (if any). This reference should be dropped 810Sstevel@tonic-gate * when the caller has finished processing the connection. 820Sstevel@tonic-gate * 830Sstevel@tonic-gate * 840Sstevel@tonic-gate * INTERFACES: 850Sstevel@tonic-gate * =========== 860Sstevel@tonic-gate * 870Sstevel@tonic-gate * Connection Lookup: 880Sstevel@tonic-gate * ------------------ 890Sstevel@tonic-gate * 9011042SErik.Nordmark@Sun.COM * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack) 9111042SErik.Nordmark@Sun.COM * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack) 920Sstevel@tonic-gate * 930Sstevel@tonic-gate * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 940Sstevel@tonic-gate * it can't find any associated connection. If the connection is found, its 950Sstevel@tonic-gate * reference counter is incremented. 960Sstevel@tonic-gate * 970Sstevel@tonic-gate * mp: mblock, containing packet header. The full header should fit 980Sstevel@tonic-gate * into a single mblock. It should also contain at least full IP 990Sstevel@tonic-gate * and TCP or UDP header. 1000Sstevel@tonic-gate * 1010Sstevel@tonic-gate * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 1020Sstevel@tonic-gate * 1030Sstevel@tonic-gate * hdr_len: The size of IP header. It is used to find TCP or UDP header in 1040Sstevel@tonic-gate * the packet. 1050Sstevel@tonic-gate * 10611042SErik.Nordmark@Sun.COM * ira->ira_zoneid: The zone in which the returned connection must be; the 10711042SErik.Nordmark@Sun.COM * zoneid corresponding to the ire_zoneid on the IRE located for 10811042SErik.Nordmark@Sun.COM * the packet's destination address. 10911042SErik.Nordmark@Sun.COM * 11011042SErik.Nordmark@Sun.COM * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and 11111042SErik.Nordmark@Sun.COM * IRAF_TX_SHARED_ADDR flags 1120Sstevel@tonic-gate * 1130Sstevel@tonic-gate * For TCP connections, the lookup order is as follows: 1140Sstevel@tonic-gate * 5-tuple {src, dst, protocol, local port, remote port} 1150Sstevel@tonic-gate * lookup in ipcl_conn_fanout table. 1160Sstevel@tonic-gate * 3-tuple {dst, remote port, protocol} lookup in 1170Sstevel@tonic-gate * ipcl_bind_fanout table. 1180Sstevel@tonic-gate * 1190Sstevel@tonic-gate * For UDP connections, a 5-tuple {src, dst, protocol, local port, 1200Sstevel@tonic-gate * remote port} lookup is done on ipcl_udp_fanout. Note that, 1210Sstevel@tonic-gate * these interfaces do not handle cases where a packets belongs 1220Sstevel@tonic-gate * to multiple UDP clients, which is handled in IP itself. 1230Sstevel@tonic-gate * 1241676Sjpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 1251676Sjpk * determine which actual zone gets the segment. This is used only in a 1261676Sjpk * labeled environment. The matching rules are: 1271676Sjpk * 1281676Sjpk * - If it's not a multilevel port, then the label on the packet selects 1291676Sjpk * the zone. Unlabeled packets are delivered to the global zone. 1301676Sjpk * 1311676Sjpk * - If it's a multilevel port, then only the zone registered to receive 1321676Sjpk * packets on that port matches. 1331676Sjpk * 1341676Sjpk * Also, in a labeled environment, packet labels need to be checked. For fully 1351676Sjpk * bound TCP connections, we can assume that the packet label was checked 1361676Sjpk * during connection establishment, and doesn't need to be checked on each 1371676Sjpk * packet. For others, though, we need to check for strict equality or, for 1381676Sjpk * multilevel ports, membership in the range or set. This part currently does 1391676Sjpk * a tnrh lookup on each packet, but could be optimized to use cached results 1401676Sjpk * if that were necessary. (SCTP doesn't come through here, but if it did, 1411676Sjpk * we would apply the same rules as TCP.) 1421676Sjpk * 1431676Sjpk * An implication of the above is that fully-bound TCP sockets must always use 1441676Sjpk * distinct 4-tuples; they can't be discriminated by label alone. 1451676Sjpk * 1461676Sjpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 1471676Sjpk * as there's no connection set-up handshake and no shared state. 1481676Sjpk * 1491676Sjpk * Labels on looped-back packets within a single zone do not need to be 1501676Sjpk * checked, as all processes in the same zone have the same label. 1511676Sjpk * 1521676Sjpk * Finally, for unlabeled packets received by a labeled system, special rules 1531676Sjpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a 1541676Sjpk * socket in the zone whose label matches the default label of the sender, if 1551676Sjpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 1561676Sjpk * receiver's label must dominate the sender's default label. 1571676Sjpk * 15811042SErik.Nordmark@Sun.COM * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack); 1593448Sdh155122 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 1603448Sdh155122 * ip_stack); 1610Sstevel@tonic-gate * 1620Sstevel@tonic-gate * Lookup routine to find a exact match for {src, dst, local port, 1630Sstevel@tonic-gate * remote port) for TCP connections in ipcl_conn_fanout. The address and 1640Sstevel@tonic-gate * ports are read from the IP and TCP header respectively. 1650Sstevel@tonic-gate * 1663448Sdh155122 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 1673448Sdh155122 * zoneid, ip_stack); 1683448Sdh155122 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 1693448Sdh155122 * zoneid, ip_stack); 1700Sstevel@tonic-gate * 1710Sstevel@tonic-gate * Lookup routine to find a listener with the tuple {lport, laddr, 1720Sstevel@tonic-gate * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 1730Sstevel@tonic-gate * parameter interface index is also compared. 1740Sstevel@tonic-gate * 1753448Sdh155122 * void ipcl_walk(func, arg, ip_stack) 1760Sstevel@tonic-gate * 1770Sstevel@tonic-gate * Apply 'func' to every connection available. The 'func' is called as 1780Sstevel@tonic-gate * (*func)(connp, arg). The walk is non-atomic so connections may be 1790Sstevel@tonic-gate * created and destroyed during the walk. The CONN_CONDEMNED and 1800Sstevel@tonic-gate * CONN_INCIPIENT flags ensure that connections which are newly created 1810Sstevel@tonic-gate * or being destroyed are not selected by the walker. 1820Sstevel@tonic-gate * 1830Sstevel@tonic-gate * Table Updates 1840Sstevel@tonic-gate * ------------- 1850Sstevel@tonic-gate * 18611042SErik.Nordmark@Sun.COM * int ipcl_conn_insert(connp); 18711042SErik.Nordmark@Sun.COM * int ipcl_conn_insert_v4(connp); 18811042SErik.Nordmark@Sun.COM * int ipcl_conn_insert_v6(connp); 1890Sstevel@tonic-gate * 1900Sstevel@tonic-gate * Insert 'connp' in the ipcl_conn_fanout. 1910Sstevel@tonic-gate * Arguements : 1920Sstevel@tonic-gate * connp conn_t to be inserted 1930Sstevel@tonic-gate * 1940Sstevel@tonic-gate * Return value : 1950Sstevel@tonic-gate * 0 if connp was inserted 1960Sstevel@tonic-gate * EADDRINUSE if the connection with the same tuple 1970Sstevel@tonic-gate * already exists. 1980Sstevel@tonic-gate * 19911042SErik.Nordmark@Sun.COM * int ipcl_bind_insert(connp); 20011042SErik.Nordmark@Sun.COM * int ipcl_bind_insert_v4(connp); 20111042SErik.Nordmark@Sun.COM * int ipcl_bind_insert_v6(connp); 2020Sstevel@tonic-gate * 2030Sstevel@tonic-gate * Insert 'connp' in ipcl_bind_fanout. 2040Sstevel@tonic-gate * Arguements : 2050Sstevel@tonic-gate * connp conn_t to be inserted 2060Sstevel@tonic-gate * 2070Sstevel@tonic-gate * 2080Sstevel@tonic-gate * void ipcl_hash_remove(connp); 2090Sstevel@tonic-gate * 2100Sstevel@tonic-gate * Removes the 'connp' from the connection fanout table. 2110Sstevel@tonic-gate * 2120Sstevel@tonic-gate * Connection Creation/Destruction 2130Sstevel@tonic-gate * ------------------------------- 2140Sstevel@tonic-gate * 2153448Sdh155122 * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 2160Sstevel@tonic-gate * 2170Sstevel@tonic-gate * Creates a new conn based on the type flag, inserts it into 2180Sstevel@tonic-gate * globalhash table. 2190Sstevel@tonic-gate * 2200Sstevel@tonic-gate * type: This flag determines the type of conn_t which needs to be 2215240Snordmark * created i.e., which kmem_cache it comes from. 2220Sstevel@tonic-gate * IPCL_TCPCONN indicates a TCP connection 2235240Snordmark * IPCL_SCTPCONN indicates a SCTP connection 2245240Snordmark * IPCL_UDPCONN indicates a UDP conn_t. 2255240Snordmark * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 2265240Snordmark * IPCL_RTSCONN indicates a RTS conn_t. 2275240Snordmark * IPCL_IPCCONN indicates all other connections. 2280Sstevel@tonic-gate * 2290Sstevel@tonic-gate * void ipcl_conn_destroy(connp) 2300Sstevel@tonic-gate * 2310Sstevel@tonic-gate * Destroys the connection state, removes it from the global 2320Sstevel@tonic-gate * connection hash table and frees its memory. 2330Sstevel@tonic-gate */ 2340Sstevel@tonic-gate 2350Sstevel@tonic-gate #include <sys/types.h> 2360Sstevel@tonic-gate #include <sys/stream.h> 2370Sstevel@tonic-gate #include <sys/stropts.h> 2380Sstevel@tonic-gate #include <sys/sysmacros.h> 2390Sstevel@tonic-gate #include <sys/strsubr.h> 2400Sstevel@tonic-gate #include <sys/strsun.h> 2410Sstevel@tonic-gate #define _SUN_TPI_VERSION 2 2420Sstevel@tonic-gate #include <sys/ddi.h> 2430Sstevel@tonic-gate #include <sys/cmn_err.h> 2440Sstevel@tonic-gate #include <sys/debug.h> 2450Sstevel@tonic-gate 2460Sstevel@tonic-gate #include <sys/systm.h> 2470Sstevel@tonic-gate #include <sys/param.h> 2480Sstevel@tonic-gate #include <sys/kmem.h> 2490Sstevel@tonic-gate #include <sys/isa_defs.h> 2500Sstevel@tonic-gate #include <inet/common.h> 2510Sstevel@tonic-gate #include <netinet/ip6.h> 2520Sstevel@tonic-gate #include <netinet/icmp6.h> 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate #include <inet/ip.h> 25511042SErik.Nordmark@Sun.COM #include <inet/ip_if.h> 25611042SErik.Nordmark@Sun.COM #include <inet/ip_ire.h> 2570Sstevel@tonic-gate #include <inet/ip6.h> 2580Sstevel@tonic-gate #include <inet/ip_ndp.h> 2598348SEric.Yu@Sun.COM #include <inet/ip_impl.h> 260741Smasputra #include <inet/udp_impl.h> 2610Sstevel@tonic-gate #include <inet/sctp_ip.h> 2623448Sdh155122 #include <inet/sctp/sctp_impl.h> 2635240Snordmark #include <inet/rawip_impl.h> 2645240Snordmark #include <inet/rts_impl.h> 26510616SSebastien.Roy@Sun.COM #include <inet/iptun/iptun_impl.h> 2660Sstevel@tonic-gate 2670Sstevel@tonic-gate #include <sys/cpuvar.h> 2680Sstevel@tonic-gate 2690Sstevel@tonic-gate #include <inet/ipclassifier.h> 2708348SEric.Yu@Sun.COM #include <inet/tcp.h> 2710Sstevel@tonic-gate #include <inet/ipsec_impl.h> 2720Sstevel@tonic-gate 2731676Sjpk #include <sys/tsol/tnet.h> 2748348SEric.Yu@Sun.COM #include <sys/sockio.h> 2751676Sjpk 2763448Sdh155122 /* Old value for compatibility. Setable in /etc/system */ 2770Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0; 2780Sstevel@tonic-gate 2793448Sdh155122 /* New value. Zero means choose automatically. Setable in /etc/system */ 2800Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0; 2810Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192; 2820Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500; 2830Sstevel@tonic-gate 2840Sstevel@tonic-gate /* bind/udp fanout table size */ 2850Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512; 2861503Sericheng uint_t ipcl_udp_fanout_size = 16384; 2870Sstevel@tonic-gate 2880Sstevel@tonic-gate /* Raw socket fanout size. Must be a power of 2. */ 2890Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256; 2900Sstevel@tonic-gate 2910Sstevel@tonic-gate /* 29210616SSebastien.Roy@Sun.COM * The IPCL_IPTUN_HASH() function works best with a prime table size. We 29310616SSebastien.Roy@Sun.COM * expect that most large deployments would have hundreds of tunnels, and 29410616SSebastien.Roy@Sun.COM * thousands in the extreme case. 29510616SSebastien.Roy@Sun.COM */ 29610616SSebastien.Roy@Sun.COM uint_t ipcl_iptun_fanout_size = 6143; 29710616SSebastien.Roy@Sun.COM 29810616SSebastien.Roy@Sun.COM /* 2990Sstevel@tonic-gate * Power of 2^N Primes useful for hashing for N of 0-28, 3000Sstevel@tonic-gate * these primes are the nearest prime <= 2^N - 2^(N-2). 3010Sstevel@tonic-gate */ 3020Sstevel@tonic-gate 3030Sstevel@tonic-gate #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 3040Sstevel@tonic-gate 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 3050Sstevel@tonic-gate 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 3060Sstevel@tonic-gate 50331599, 100663291, 201326557, 0} 3070Sstevel@tonic-gate 3080Sstevel@tonic-gate /* 3095240Snordmark * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 3105240Snordmark * are aligned on cache lines. 3110Sstevel@tonic-gate */ 3125240Snordmark typedef union itc_s { 3135240Snordmark conn_t itc_conn; 3145240Snordmark char itcu_filler[CACHE_ALIGN(conn_s)]; 3150Sstevel@tonic-gate } itc_t; 3160Sstevel@tonic-gate 3175240Snordmark struct kmem_cache *tcp_conn_cache; 3185240Snordmark struct kmem_cache *ip_conn_cache; 3190Sstevel@tonic-gate extern struct kmem_cache *sctp_conn_cache; 3200Sstevel@tonic-gate extern struct kmem_cache *tcp_sack_info_cache; 3215240Snordmark struct kmem_cache *udp_conn_cache; 3225240Snordmark struct kmem_cache *rawip_conn_cache; 3235240Snordmark struct kmem_cache *rts_conn_cache; 3240Sstevel@tonic-gate 3250Sstevel@tonic-gate extern void tcp_timermp_free(tcp_t *); 3260Sstevel@tonic-gate extern mblk_t *tcp_timermp_alloc(int); 3270Sstevel@tonic-gate 3285240Snordmark static int ip_conn_constructor(void *, void *, int); 3295240Snordmark static void ip_conn_destructor(void *, void *); 3305240Snordmark 3315240Snordmark static int tcp_conn_constructor(void *, void *, int); 3325240Snordmark static void tcp_conn_destructor(void *, void *); 3335240Snordmark 3345240Snordmark static int udp_conn_constructor(void *, void *, int); 3355240Snordmark static void udp_conn_destructor(void *, void *); 3365240Snordmark 3375240Snordmark static int rawip_conn_constructor(void *, void *, int); 3385240Snordmark static void rawip_conn_destructor(void *, void *); 3395240Snordmark 3405240Snordmark static int rts_conn_constructor(void *, void *, int); 3415240Snordmark static void rts_conn_destructor(void *, void *); 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate /* 3443448Sdh155122 * Global (for all stack instances) init routine 3450Sstevel@tonic-gate */ 3460Sstevel@tonic-gate void 3473448Sdh155122 ipcl_g_init(void) 3480Sstevel@tonic-gate { 3495240Snordmark ip_conn_cache = kmem_cache_create("ip_conn_cache", 3500Sstevel@tonic-gate sizeof (conn_t), CACHE_ALIGN_SIZE, 3515240Snordmark ip_conn_constructor, ip_conn_destructor, 3525240Snordmark NULL, NULL, NULL, 0); 3535240Snordmark 3545240Snordmark tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 3555240Snordmark sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 3565240Snordmark tcp_conn_constructor, tcp_conn_destructor, 357*11303SKacheong.Poon@Sun.COM tcp_conn_reclaim, NULL, NULL, 0); 3580Sstevel@tonic-gate 3595240Snordmark udp_conn_cache = kmem_cache_create("udp_conn_cache", 3605240Snordmark sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 3615240Snordmark udp_conn_constructor, udp_conn_destructor, 3625240Snordmark NULL, NULL, NULL, 0); 3635240Snordmark 3645240Snordmark rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 3655240Snordmark sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 3665240Snordmark rawip_conn_constructor, rawip_conn_destructor, 3675240Snordmark NULL, NULL, NULL, 0); 3685240Snordmark 3695240Snordmark rts_conn_cache = kmem_cache_create("rts_conn_cache", 3705240Snordmark sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 3715240Snordmark rts_conn_constructor, rts_conn_destructor, 3720Sstevel@tonic-gate NULL, NULL, NULL, 0); 3733448Sdh155122 } 3743448Sdh155122 3753448Sdh155122 /* 3763448Sdh155122 * ipclassifier intialization routine, sets up hash tables. 3773448Sdh155122 */ 3783448Sdh155122 void 3793448Sdh155122 ipcl_init(ip_stack_t *ipst) 3803448Sdh155122 { 3813448Sdh155122 int i; 3823448Sdh155122 int sizes[] = P2Ps(); 3830Sstevel@tonic-gate 3840Sstevel@tonic-gate /* 3853448Sdh155122 * Calculate size of conn fanout table from /etc/system settings 3860Sstevel@tonic-gate */ 3870Sstevel@tonic-gate if (ipcl_conn_hash_size != 0) { 3883448Sdh155122 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 3890Sstevel@tonic-gate } else if (tcp_conn_hash_size != 0) { 3903448Sdh155122 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 3910Sstevel@tonic-gate } else { 3920Sstevel@tonic-gate extern pgcnt_t freemem; 3930Sstevel@tonic-gate 3943448Sdh155122 ipst->ips_ipcl_conn_fanout_size = 3950Sstevel@tonic-gate (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 3960Sstevel@tonic-gate 3973448Sdh155122 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 3983448Sdh155122 ipst->ips_ipcl_conn_fanout_size = 3993448Sdh155122 ipcl_conn_hash_maxsize; 4003448Sdh155122 } 4010Sstevel@tonic-gate } 4020Sstevel@tonic-gate 4030Sstevel@tonic-gate for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 4043448Sdh155122 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 4050Sstevel@tonic-gate break; 4060Sstevel@tonic-gate } 4070Sstevel@tonic-gate } 4083448Sdh155122 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 4090Sstevel@tonic-gate /* Out of range, use the 2^16 value */ 4103448Sdh155122 ipst->ips_ipcl_conn_fanout_size = sizes[16]; 4110Sstevel@tonic-gate } 4123448Sdh155122 4133448Sdh155122 /* Take values from /etc/system */ 4143448Sdh155122 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 4153448Sdh155122 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 4163448Sdh155122 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 41710616SSebastien.Roy@Sun.COM ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size; 4180Sstevel@tonic-gate 4193448Sdh155122 ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 4203448Sdh155122 4213448Sdh155122 ipst->ips_ipcl_conn_fanout = kmem_zalloc( 4223448Sdh155122 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 4233448Sdh155122 4243448Sdh155122 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 4253448Sdh155122 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 4260Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4270Sstevel@tonic-gate } 4280Sstevel@tonic-gate 4293448Sdh155122 ipst->ips_ipcl_bind_fanout = kmem_zalloc( 4303448Sdh155122 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 4310Sstevel@tonic-gate 4323448Sdh155122 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 4333448Sdh155122 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 4340Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4350Sstevel@tonic-gate } 4360Sstevel@tonic-gate 43711042SErik.Nordmark@Sun.COM ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX * 4383448Sdh155122 sizeof (connf_t), KM_SLEEP); 4393448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 44011042SErik.Nordmark@Sun.COM mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL, 4410Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4420Sstevel@tonic-gate } 4433448Sdh155122 4443448Sdh155122 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 4453448Sdh155122 sizeof (connf_t), KM_SLEEP); 4463448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 4473448Sdh155122 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 4480Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4490Sstevel@tonic-gate } 4500Sstevel@tonic-gate 4513448Sdh155122 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 4523448Sdh155122 mutex_init(&ipst->ips_rts_clients->connf_lock, 4533448Sdh155122 NULL, MUTEX_DEFAULT, NULL); 4540Sstevel@tonic-gate 4553448Sdh155122 ipst->ips_ipcl_udp_fanout = kmem_zalloc( 4563448Sdh155122 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 4573448Sdh155122 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 4583448Sdh155122 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 4590Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4600Sstevel@tonic-gate } 4610Sstevel@tonic-gate 46210616SSebastien.Roy@Sun.COM ipst->ips_ipcl_iptun_fanout = kmem_zalloc( 46310616SSebastien.Roy@Sun.COM ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP); 46410616SSebastien.Roy@Sun.COM for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 46510616SSebastien.Roy@Sun.COM mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL, 46610616SSebastien.Roy@Sun.COM MUTEX_DEFAULT, NULL); 46710616SSebastien.Roy@Sun.COM } 46810616SSebastien.Roy@Sun.COM 4693448Sdh155122 ipst->ips_ipcl_raw_fanout = kmem_zalloc( 4703448Sdh155122 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 4713448Sdh155122 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 4723448Sdh155122 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 4730Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4740Sstevel@tonic-gate } 4750Sstevel@tonic-gate 4763448Sdh155122 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 4773448Sdh155122 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 4780Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4793448Sdh155122 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 4803448Sdh155122 NULL, MUTEX_DEFAULT, NULL); 4810Sstevel@tonic-gate } 4820Sstevel@tonic-gate } 4830Sstevel@tonic-gate 4840Sstevel@tonic-gate void 4853448Sdh155122 ipcl_g_destroy(void) 4860Sstevel@tonic-gate { 4875240Snordmark kmem_cache_destroy(ip_conn_cache); 4885240Snordmark kmem_cache_destroy(tcp_conn_cache); 4895240Snordmark kmem_cache_destroy(udp_conn_cache); 4905240Snordmark kmem_cache_destroy(rawip_conn_cache); 4915240Snordmark kmem_cache_destroy(rts_conn_cache); 4923448Sdh155122 } 4933448Sdh155122 4943448Sdh155122 /* 4953448Sdh155122 * All user-level and kernel use of the stack must be gone 4963448Sdh155122 * by now. 4973448Sdh155122 */ 4983448Sdh155122 void 4993448Sdh155122 ipcl_destroy(ip_stack_t *ipst) 5003448Sdh155122 { 5013448Sdh155122 int i; 5023448Sdh155122 5033448Sdh155122 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 5043448Sdh155122 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 5053448Sdh155122 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 5063448Sdh155122 } 5073448Sdh155122 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 5083448Sdh155122 sizeof (connf_t)); 5093448Sdh155122 ipst->ips_ipcl_conn_fanout = NULL; 5103448Sdh155122 5113448Sdh155122 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 5123448Sdh155122 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 5133448Sdh155122 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 5143448Sdh155122 } 5153448Sdh155122 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 5163448Sdh155122 sizeof (connf_t)); 5173448Sdh155122 ipst->ips_ipcl_bind_fanout = NULL; 5183448Sdh155122 5193448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 52011042SErik.Nordmark@Sun.COM ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL); 52111042SErik.Nordmark@Sun.COM mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock); 5223448Sdh155122 } 52311042SErik.Nordmark@Sun.COM kmem_free(ipst->ips_ipcl_proto_fanout_v4, 52411042SErik.Nordmark@Sun.COM IPPROTO_MAX * sizeof (connf_t)); 52511042SErik.Nordmark@Sun.COM ipst->ips_ipcl_proto_fanout_v4 = NULL; 5260Sstevel@tonic-gate 5273448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) { 5283448Sdh155122 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 5293448Sdh155122 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 5303448Sdh155122 } 5313448Sdh155122 kmem_free(ipst->ips_ipcl_proto_fanout_v6, 5323448Sdh155122 IPPROTO_MAX * sizeof (connf_t)); 5333448Sdh155122 ipst->ips_ipcl_proto_fanout_v6 = NULL; 5343448Sdh155122 5353448Sdh155122 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 5363448Sdh155122 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 5373448Sdh155122 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 5383448Sdh155122 } 5393448Sdh155122 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 5403448Sdh155122 sizeof (connf_t)); 5413448Sdh155122 ipst->ips_ipcl_udp_fanout = NULL; 5420Sstevel@tonic-gate 54310616SSebastien.Roy@Sun.COM for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 54410616SSebastien.Roy@Sun.COM ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL); 54510616SSebastien.Roy@Sun.COM mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock); 54610616SSebastien.Roy@Sun.COM } 54710616SSebastien.Roy@Sun.COM kmem_free(ipst->ips_ipcl_iptun_fanout, 54810616SSebastien.Roy@Sun.COM ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t)); 54910616SSebastien.Roy@Sun.COM ipst->ips_ipcl_iptun_fanout = NULL; 55010616SSebastien.Roy@Sun.COM 5513448Sdh155122 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 5523448Sdh155122 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 5533448Sdh155122 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 5543448Sdh155122 } 5553448Sdh155122 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 5563448Sdh155122 sizeof (connf_t)); 5573448Sdh155122 ipst->ips_ipcl_raw_fanout = NULL; 5580Sstevel@tonic-gate 5593448Sdh155122 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5603448Sdh155122 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 5613448Sdh155122 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 5623448Sdh155122 } 5633448Sdh155122 kmem_free(ipst->ips_ipcl_globalhash_fanout, 5643448Sdh155122 sizeof (connf_t) * CONN_G_HASH_SIZE); 5653448Sdh155122 ipst->ips_ipcl_globalhash_fanout = NULL; 5660Sstevel@tonic-gate 5673448Sdh155122 ASSERT(ipst->ips_rts_clients->connf_head == NULL); 5683448Sdh155122 mutex_destroy(&ipst->ips_rts_clients->connf_lock); 5693448Sdh155122 kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 5703448Sdh155122 ipst->ips_rts_clients = NULL; 5710Sstevel@tonic-gate } 5720Sstevel@tonic-gate 5730Sstevel@tonic-gate /* 5740Sstevel@tonic-gate * conn creation routine. initialize the conn, sets the reference 5750Sstevel@tonic-gate * and inserts it in the global hash table. 5760Sstevel@tonic-gate */ 5770Sstevel@tonic-gate conn_t * 5783448Sdh155122 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 5790Sstevel@tonic-gate { 5800Sstevel@tonic-gate conn_t *connp; 5815240Snordmark struct kmem_cache *conn_cache; 5820Sstevel@tonic-gate 5830Sstevel@tonic-gate switch (type) { 5840Sstevel@tonic-gate case IPCL_SCTPCONN: 5850Sstevel@tonic-gate if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 5860Sstevel@tonic-gate return (NULL); 5874691Skcpoon sctp_conn_init(connp); 5883448Sdh155122 netstack_hold(ns); 5893448Sdh155122 connp->conn_netstack = ns; 59011042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_ipst = ns->netstack_ip; 59111042SErik.Nordmark@Sun.COM ipcl_globalhash_insert(connp); 5925240Snordmark return (connp); 5935240Snordmark 5945240Snordmark case IPCL_TCPCONN: 5955240Snordmark conn_cache = tcp_conn_cache; 5960Sstevel@tonic-gate break; 5975240Snordmark 5985240Snordmark case IPCL_UDPCONN: 5995240Snordmark conn_cache = udp_conn_cache; 6005240Snordmark break; 6015240Snordmark 6025240Snordmark case IPCL_RAWIPCONN: 6035240Snordmark conn_cache = rawip_conn_cache; 6045240Snordmark break; 6055240Snordmark 6065240Snordmark case IPCL_RTSCONN: 6075240Snordmark conn_cache = rts_conn_cache; 6085240Snordmark break; 6095240Snordmark 6100Sstevel@tonic-gate case IPCL_IPCCONN: 6115240Snordmark conn_cache = ip_conn_cache; 6120Sstevel@tonic-gate break; 6135240Snordmark 614741Smasputra default: 615741Smasputra connp = NULL; 616741Smasputra ASSERT(0); 6170Sstevel@tonic-gate } 6180Sstevel@tonic-gate 6195240Snordmark if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 6205240Snordmark return (NULL); 6215240Snordmark 6225240Snordmark connp->conn_ref = 1; 6235240Snordmark netstack_hold(ns); 6245240Snordmark connp->conn_netstack = ns; 62511042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_ipst = ns->netstack_ip; 6265240Snordmark ipcl_globalhash_insert(connp); 6270Sstevel@tonic-gate return (connp); 6280Sstevel@tonic-gate } 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate void 6310Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp) 6320Sstevel@tonic-gate { 6330Sstevel@tonic-gate mblk_t *mp; 6343448Sdh155122 netstack_t *ns = connp->conn_netstack; 6350Sstevel@tonic-gate 6360Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&connp->conn_lock)); 6370Sstevel@tonic-gate ASSERT(connp->conn_ref == 0); 6380Sstevel@tonic-gate 6397502Saruna@cs.umn.edu DTRACE_PROBE1(conn__destroy, conn_t *, connp); 6407502Saruna@cs.umn.edu 6411676Sjpk if (connp->conn_cred != NULL) { 6421676Sjpk crfree(connp->conn_cred); 6431676Sjpk connp->conn_cred = NULL; 6441676Sjpk } 6451676Sjpk 64611042SErik.Nordmark@Sun.COM if (connp->conn_ht_iphc != NULL) { 64711042SErik.Nordmark@Sun.COM kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated); 64811042SErik.Nordmark@Sun.COM connp->conn_ht_iphc = NULL; 64911042SErik.Nordmark@Sun.COM connp->conn_ht_iphc_allocated = 0; 65011042SErik.Nordmark@Sun.COM connp->conn_ht_iphc_len = 0; 65111042SErik.Nordmark@Sun.COM connp->conn_ht_ulp = NULL; 65211042SErik.Nordmark@Sun.COM connp->conn_ht_ulp_len = 0; 65311042SErik.Nordmark@Sun.COM } 65411042SErik.Nordmark@Sun.COM ip_pkt_free(&connp->conn_xmit_ipp); 65511042SErik.Nordmark@Sun.COM 6560Sstevel@tonic-gate ipcl_globalhash_remove(connp); 6570Sstevel@tonic-gate 65811042SErik.Nordmark@Sun.COM if (connp->conn_latch != NULL) { 65911042SErik.Nordmark@Sun.COM IPLATCH_REFRELE(connp->conn_latch); 66011042SErik.Nordmark@Sun.COM connp->conn_latch = NULL; 66111042SErik.Nordmark@Sun.COM } 66211042SErik.Nordmark@Sun.COM if (connp->conn_latch_in_policy != NULL) { 66311042SErik.Nordmark@Sun.COM IPPOL_REFRELE(connp->conn_latch_in_policy); 66411042SErik.Nordmark@Sun.COM connp->conn_latch_in_policy = NULL; 66511042SErik.Nordmark@Sun.COM } 66611042SErik.Nordmark@Sun.COM if (connp->conn_latch_in_action != NULL) { 66711042SErik.Nordmark@Sun.COM IPACT_REFRELE(connp->conn_latch_in_action); 66811042SErik.Nordmark@Sun.COM connp->conn_latch_in_action = NULL; 66911042SErik.Nordmark@Sun.COM } 67011042SErik.Nordmark@Sun.COM if (connp->conn_policy != NULL) { 67111042SErik.Nordmark@Sun.COM IPPH_REFRELE(connp->conn_policy, ns); 67211042SErik.Nordmark@Sun.COM connp->conn_policy = NULL; 67311042SErik.Nordmark@Sun.COM } 6743448Sdh155122 67511042SErik.Nordmark@Sun.COM if (connp->conn_ipsec_opt_mp != NULL) { 67611042SErik.Nordmark@Sun.COM freemsg(connp->conn_ipsec_opt_mp); 67711042SErik.Nordmark@Sun.COM connp->conn_ipsec_opt_mp = NULL; 67811042SErik.Nordmark@Sun.COM } 67911042SErik.Nordmark@Sun.COM 68011042SErik.Nordmark@Sun.COM if (connp->conn_flags & IPCL_TCPCONN) { 68111042SErik.Nordmark@Sun.COM tcp_t *tcp = connp->conn_tcp; 682741Smasputra 6830Sstevel@tonic-gate tcp_free(tcp); 6840Sstevel@tonic-gate mp = tcp->tcp_timercache; 68511042SErik.Nordmark@Sun.COM 68611042SErik.Nordmark@Sun.COM tcp->tcp_tcps = NULL; 6870Sstevel@tonic-gate 6880Sstevel@tonic-gate if (tcp->tcp_sack_info != NULL) { 6890Sstevel@tonic-gate bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 6900Sstevel@tonic-gate kmem_cache_free(tcp_sack_info_cache, 6910Sstevel@tonic-gate tcp->tcp_sack_info); 6920Sstevel@tonic-gate } 6930Sstevel@tonic-gate 6948014SKacheong.Poon@Sun.COM /* 6958014SKacheong.Poon@Sun.COM * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate 6968014SKacheong.Poon@Sun.COM * the mblk. 6978014SKacheong.Poon@Sun.COM */ 6988014SKacheong.Poon@Sun.COM if (tcp->tcp_rsrv_mp != NULL) { 6998014SKacheong.Poon@Sun.COM freeb(tcp->tcp_rsrv_mp); 7008014SKacheong.Poon@Sun.COM tcp->tcp_rsrv_mp = NULL; 7018014SKacheong.Poon@Sun.COM mutex_destroy(&tcp->tcp_rsrv_mp_lock); 7028014SKacheong.Poon@Sun.COM } 7038014SKacheong.Poon@Sun.COM 70411042SErik.Nordmark@Sun.COM ipcl_conn_cleanup(connp); 70511042SErik.Nordmark@Sun.COM connp->conn_flags = IPCL_TCPCONN; 7063448Sdh155122 if (ns != NULL) { 7073448Sdh155122 ASSERT(tcp->tcp_tcps == NULL); 7083448Sdh155122 connp->conn_netstack = NULL; 70911042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_ipst = NULL; 7103448Sdh155122 netstack_rele(ns); 7113448Sdh155122 } 7125240Snordmark 7135240Snordmark bzero(tcp, sizeof (tcp_t)); 7145240Snordmark 7155240Snordmark tcp->tcp_timercache = mp; 7165240Snordmark tcp->tcp_connp = connp; 7175240Snordmark kmem_cache_free(tcp_conn_cache, connp); 7185240Snordmark return; 7195240Snordmark } 7205240Snordmark 7215240Snordmark if (connp->conn_flags & IPCL_SCTPCONN) { 7223448Sdh155122 ASSERT(ns != NULL); 7230Sstevel@tonic-gate sctp_free(connp); 7245240Snordmark return; 7255240Snordmark } 7265240Snordmark 72711042SErik.Nordmark@Sun.COM ipcl_conn_cleanup(connp); 7285240Snordmark if (ns != NULL) { 7295240Snordmark connp->conn_netstack = NULL; 73011042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_ipst = NULL; 7315240Snordmark netstack_rele(ns); 7325240Snordmark } 7338348SEric.Yu@Sun.COM 7345240Snordmark /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 7355240Snordmark if (connp->conn_flags & IPCL_UDPCONN) { 7365240Snordmark connp->conn_flags = IPCL_UDPCONN; 7375240Snordmark kmem_cache_free(udp_conn_cache, connp); 7385240Snordmark } else if (connp->conn_flags & IPCL_RAWIPCONN) { 7395240Snordmark connp->conn_flags = IPCL_RAWIPCONN; 74011042SErik.Nordmark@Sun.COM connp->conn_proto = IPPROTO_ICMP; 74111042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_protocol = connp->conn_proto; 7425240Snordmark kmem_cache_free(rawip_conn_cache, connp); 7435240Snordmark } else if (connp->conn_flags & IPCL_RTSCONN) { 7445240Snordmark connp->conn_flags = IPCL_RTSCONN; 7455240Snordmark kmem_cache_free(rts_conn_cache, connp); 7460Sstevel@tonic-gate } else { 7475240Snordmark connp->conn_flags = IPCL_IPCCONN; 7485240Snordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 7495240Snordmark ASSERT(connp->conn_priv == NULL); 7505240Snordmark kmem_cache_free(ip_conn_cache, connp); 7510Sstevel@tonic-gate } 7520Sstevel@tonic-gate } 7530Sstevel@tonic-gate 7540Sstevel@tonic-gate /* 7550Sstevel@tonic-gate * Running in cluster mode - deregister listener information 7560Sstevel@tonic-gate */ 7570Sstevel@tonic-gate static void 7580Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp) 7590Sstevel@tonic-gate { 7600Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 7610Sstevel@tonic-gate ASSERT(connp->conn_lport != 0); 7620Sstevel@tonic-gate 7630Sstevel@tonic-gate if (cl_inet_unlisten != NULL) { 7640Sstevel@tonic-gate sa_family_t addr_family; 7650Sstevel@tonic-gate uint8_t *laddrp; 7660Sstevel@tonic-gate 76711042SErik.Nordmark@Sun.COM if (connp->conn_ipversion == IPV6_VERSION) { 7680Sstevel@tonic-gate addr_family = AF_INET6; 76911042SErik.Nordmark@Sun.COM laddrp = (uint8_t *)&connp->conn_bound_addr_v6; 7700Sstevel@tonic-gate } else { 7710Sstevel@tonic-gate addr_family = AF_INET; 77211042SErik.Nordmark@Sun.COM laddrp = (uint8_t *)&connp->conn_bound_addr_v4; 7730Sstevel@tonic-gate } 7748392SHuafeng.Lv@Sun.COM (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid, 7758392SHuafeng.Lv@Sun.COM IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL); 7760Sstevel@tonic-gate } 7770Sstevel@tonic-gate connp->conn_flags &= ~IPCL_CL_LISTENER; 7780Sstevel@tonic-gate } 7790Sstevel@tonic-gate 7800Sstevel@tonic-gate /* 7810Sstevel@tonic-gate * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 7820Sstevel@tonic-gate * which table the conn belonged to). So for debugging we can see which hash 7830Sstevel@tonic-gate * table this connection was in. 7840Sstevel@tonic-gate */ 7850Sstevel@tonic-gate #define IPCL_HASH_REMOVE(connp) { \ 7860Sstevel@tonic-gate connf_t *connfp = (connp)->conn_fanout; \ 7870Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 7880Sstevel@tonic-gate if (connfp != NULL) { \ 7890Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); \ 7900Sstevel@tonic-gate if ((connp)->conn_next != NULL) \ 7910Sstevel@tonic-gate (connp)->conn_next->conn_prev = \ 7920Sstevel@tonic-gate (connp)->conn_prev; \ 7930Sstevel@tonic-gate if ((connp)->conn_prev != NULL) \ 7940Sstevel@tonic-gate (connp)->conn_prev->conn_next = \ 7950Sstevel@tonic-gate (connp)->conn_next; \ 7960Sstevel@tonic-gate else \ 7970Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; \ 7980Sstevel@tonic-gate (connp)->conn_fanout = NULL; \ 7990Sstevel@tonic-gate (connp)->conn_next = NULL; \ 8000Sstevel@tonic-gate (connp)->conn_prev = NULL; \ 8010Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; \ 8020Sstevel@tonic-gate if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 8030Sstevel@tonic-gate ipcl_conn_unlisten((connp)); \ 8040Sstevel@tonic-gate CONN_DEC_REF((connp)); \ 8050Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); \ 8060Sstevel@tonic-gate } \ 8070Sstevel@tonic-gate } 8080Sstevel@tonic-gate 8090Sstevel@tonic-gate void 8100Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp) 8110Sstevel@tonic-gate { 81211042SErik.Nordmark@Sun.COM uint8_t protocol = connp->conn_proto; 81311042SErik.Nordmark@Sun.COM 8140Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 81511042SErik.Nordmark@Sun.COM if (protocol == IPPROTO_RSVP) 81611042SErik.Nordmark@Sun.COM ill_set_inputfn_all(connp->conn_netstack->netstack_ip); 8170Sstevel@tonic-gate } 8180Sstevel@tonic-gate 8190Sstevel@tonic-gate /* 8200Sstevel@tonic-gate * The whole purpose of this function is allow removal of 8210Sstevel@tonic-gate * a conn_t from the connected hash for timewait reclaim. 8220Sstevel@tonic-gate * This is essentially a TW reclaim fastpath where timewait 8230Sstevel@tonic-gate * collector checks under fanout lock (so no one else can 8240Sstevel@tonic-gate * get access to the conn_t) that refcnt is 2 i.e. one for 8250Sstevel@tonic-gate * TCP and one for the classifier hash list. If ref count 8260Sstevel@tonic-gate * is indeed 2, we can just remove the conn under lock and 8270Sstevel@tonic-gate * avoid cleaning up the conn under squeue. This gives us 8280Sstevel@tonic-gate * improved performance. 8290Sstevel@tonic-gate */ 8300Sstevel@tonic-gate void 8310Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 8320Sstevel@tonic-gate { 8330Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connfp->connf_lock)); 8340Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 8350Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 8360Sstevel@tonic-gate 8370Sstevel@tonic-gate if ((connp)->conn_next != NULL) { 8384691Skcpoon (connp)->conn_next->conn_prev = (connp)->conn_prev; 8390Sstevel@tonic-gate } 8400Sstevel@tonic-gate if ((connp)->conn_prev != NULL) { 8414691Skcpoon (connp)->conn_prev->conn_next = (connp)->conn_next; 8420Sstevel@tonic-gate } else { 8430Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; 8440Sstevel@tonic-gate } 8450Sstevel@tonic-gate (connp)->conn_fanout = NULL; 8460Sstevel@tonic-gate (connp)->conn_next = NULL; 8470Sstevel@tonic-gate (connp)->conn_prev = NULL; 8480Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; 8490Sstevel@tonic-gate ASSERT((connp)->conn_ref == 2); 8500Sstevel@tonic-gate (connp)->conn_ref--; 8510Sstevel@tonic-gate } 8520Sstevel@tonic-gate 8530Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 8540Sstevel@tonic-gate ASSERT((connp)->conn_fanout == NULL); \ 8550Sstevel@tonic-gate ASSERT((connp)->conn_next == NULL); \ 8560Sstevel@tonic-gate ASSERT((connp)->conn_prev == NULL); \ 8570Sstevel@tonic-gate if ((connfp)->connf_head != NULL) { \ 8580Sstevel@tonic-gate (connfp)->connf_head->conn_prev = (connp); \ 8590Sstevel@tonic-gate (connp)->conn_next = (connfp)->connf_head; \ 8600Sstevel@tonic-gate } \ 8610Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 8620Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 8630Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 8640Sstevel@tonic-gate IPCL_CONNECTED; \ 8650Sstevel@tonic-gate CONN_INC_REF(connp); \ 8660Sstevel@tonic-gate } 8670Sstevel@tonic-gate 8680Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 8690Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 8700Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 8710Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 8720Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 8730Sstevel@tonic-gate } 8740Sstevel@tonic-gate 8750Sstevel@tonic-gate #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 8760Sstevel@tonic-gate conn_t *pconnp = NULL, *nconnp; \ 8770Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 8780Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 8790Sstevel@tonic-gate nconnp = (connfp)->connf_head; \ 880153Sethindra while (nconnp != NULL && \ 88111042SErik.Nordmark@Sun.COM !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \ 882153Sethindra pconnp = nconnp; \ 883153Sethindra nconnp = nconnp->conn_next; \ 8840Sstevel@tonic-gate } \ 8850Sstevel@tonic-gate if (pconnp != NULL) { \ 8860Sstevel@tonic-gate pconnp->conn_next = (connp); \ 8870Sstevel@tonic-gate (connp)->conn_prev = pconnp; \ 8880Sstevel@tonic-gate } else { \ 8890Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 8900Sstevel@tonic-gate } \ 8910Sstevel@tonic-gate if (nconnp != NULL) { \ 8920Sstevel@tonic-gate (connp)->conn_next = nconnp; \ 8930Sstevel@tonic-gate nconnp->conn_prev = (connp); \ 8940Sstevel@tonic-gate } \ 8950Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 8960Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 8970Sstevel@tonic-gate IPCL_BOUND; \ 8980Sstevel@tonic-gate CONN_INC_REF(connp); \ 8990Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9000Sstevel@tonic-gate } 9010Sstevel@tonic-gate 9020Sstevel@tonic-gate #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 9030Sstevel@tonic-gate conn_t **list, *prev, *next; \ 9040Sstevel@tonic-gate boolean_t isv4mapped = \ 90511042SErik.Nordmark@Sun.COM IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \ 9060Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9070Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9080Sstevel@tonic-gate list = &(connfp)->connf_head; \ 9090Sstevel@tonic-gate prev = NULL; \ 9100Sstevel@tonic-gate while ((next = *list) != NULL) { \ 9110Sstevel@tonic-gate if (isv4mapped && \ 91211042SErik.Nordmark@Sun.COM IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \ 9130Sstevel@tonic-gate connp->conn_zoneid == next->conn_zoneid) { \ 9140Sstevel@tonic-gate (connp)->conn_next = next; \ 9150Sstevel@tonic-gate if (prev != NULL) \ 9160Sstevel@tonic-gate prev = next->conn_prev; \ 9170Sstevel@tonic-gate next->conn_prev = (connp); \ 9180Sstevel@tonic-gate break; \ 9190Sstevel@tonic-gate } \ 9200Sstevel@tonic-gate list = &next->conn_next; \ 9210Sstevel@tonic-gate prev = next; \ 9220Sstevel@tonic-gate } \ 9230Sstevel@tonic-gate (connp)->conn_prev = prev; \ 9240Sstevel@tonic-gate *list = (connp); \ 9250Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9260Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9270Sstevel@tonic-gate IPCL_BOUND; \ 9280Sstevel@tonic-gate CONN_INC_REF((connp)); \ 9290Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9300Sstevel@tonic-gate } 9310Sstevel@tonic-gate 9320Sstevel@tonic-gate void 9330Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 9340Sstevel@tonic-gate { 9350Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9360Sstevel@tonic-gate } 9370Sstevel@tonic-gate 9380Sstevel@tonic-gate /* 93910616SSebastien.Roy@Sun.COM * Because the classifier is used to classify inbound packets, the destination 94010616SSebastien.Roy@Sun.COM * address is meant to be our local tunnel address (tunnel source), and the 94110616SSebastien.Roy@Sun.COM * source the remote tunnel address (tunnel destination). 94211042SErik.Nordmark@Sun.COM * 94311042SErik.Nordmark@Sun.COM * Note that conn_proto can't be used for fanout since the upper protocol 94411042SErik.Nordmark@Sun.COM * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 94510616SSebastien.Roy@Sun.COM */ 94610616SSebastien.Roy@Sun.COM conn_t * 94710616SSebastien.Roy@Sun.COM ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst) 94810616SSebastien.Roy@Sun.COM { 94910616SSebastien.Roy@Sun.COM connf_t *connfp; 95010616SSebastien.Roy@Sun.COM conn_t *connp; 95110616SSebastien.Roy@Sun.COM 95210616SSebastien.Roy@Sun.COM /* first look for IPv4 tunnel links */ 95310616SSebastien.Roy@Sun.COM connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)]; 95410616SSebastien.Roy@Sun.COM mutex_enter(&connfp->connf_lock); 95510616SSebastien.Roy@Sun.COM for (connp = connfp->connf_head; connp != NULL; 95610616SSebastien.Roy@Sun.COM connp = connp->conn_next) { 95710616SSebastien.Roy@Sun.COM if (IPCL_IPTUN_MATCH(connp, *dst, *src)) 95810616SSebastien.Roy@Sun.COM break; 95910616SSebastien.Roy@Sun.COM } 96010616SSebastien.Roy@Sun.COM if (connp != NULL) 96110616SSebastien.Roy@Sun.COM goto done; 96210616SSebastien.Roy@Sun.COM 96310616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock); 96410616SSebastien.Roy@Sun.COM 96510616SSebastien.Roy@Sun.COM /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */ 96610616SSebastien.Roy@Sun.COM connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, 96710616SSebastien.Roy@Sun.COM INADDR_ANY)]; 96810616SSebastien.Roy@Sun.COM mutex_enter(&connfp->connf_lock); 96910616SSebastien.Roy@Sun.COM for (connp = connfp->connf_head; connp != NULL; 97010616SSebastien.Roy@Sun.COM connp = connp->conn_next) { 97110616SSebastien.Roy@Sun.COM if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY)) 97210616SSebastien.Roy@Sun.COM break; 97310616SSebastien.Roy@Sun.COM } 97410616SSebastien.Roy@Sun.COM done: 97510616SSebastien.Roy@Sun.COM if (connp != NULL) 97610616SSebastien.Roy@Sun.COM CONN_INC_REF(connp); 97710616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock); 97810616SSebastien.Roy@Sun.COM return (connp); 97910616SSebastien.Roy@Sun.COM } 98010616SSebastien.Roy@Sun.COM 98110616SSebastien.Roy@Sun.COM conn_t * 98210616SSebastien.Roy@Sun.COM ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst) 98310616SSebastien.Roy@Sun.COM { 98410616SSebastien.Roy@Sun.COM connf_t *connfp; 98510616SSebastien.Roy@Sun.COM conn_t *connp; 98610616SSebastien.Roy@Sun.COM 98710616SSebastien.Roy@Sun.COM /* Look for an IPv6 tunnel link */ 98810616SSebastien.Roy@Sun.COM connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)]; 98910616SSebastien.Roy@Sun.COM mutex_enter(&connfp->connf_lock); 99010616SSebastien.Roy@Sun.COM for (connp = connfp->connf_head; connp != NULL; 99110616SSebastien.Roy@Sun.COM connp = connp->conn_next) { 99210616SSebastien.Roy@Sun.COM if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) { 99310616SSebastien.Roy@Sun.COM CONN_INC_REF(connp); 99410616SSebastien.Roy@Sun.COM break; 99510616SSebastien.Roy@Sun.COM } 99610616SSebastien.Roy@Sun.COM } 99710616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock); 99810616SSebastien.Roy@Sun.COM return (connp); 99910616SSebastien.Roy@Sun.COM } 100010616SSebastien.Roy@Sun.COM 100110616SSebastien.Roy@Sun.COM /* 10020Sstevel@tonic-gate * This function is used only for inserting SCTP raw socket now. 10030Sstevel@tonic-gate * This may change later. 10040Sstevel@tonic-gate * 10050Sstevel@tonic-gate * Note that only one raw socket can be bound to a port. The param 10060Sstevel@tonic-gate * lport is in network byte order. 10070Sstevel@tonic-gate */ 10080Sstevel@tonic-gate static int 10090Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 10100Sstevel@tonic-gate { 10110Sstevel@tonic-gate connf_t *connfp; 10120Sstevel@tonic-gate conn_t *oconnp; 10133448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 10140Sstevel@tonic-gate 10153448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 10160Sstevel@tonic-gate 10170Sstevel@tonic-gate /* Check for existing raw socket already bound to the port. */ 10180Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 10190Sstevel@tonic-gate for (oconnp = connfp->connf_head; oconnp != NULL; 1020409Skcpoon oconnp = oconnp->conn_next) { 10210Sstevel@tonic-gate if (oconnp->conn_lport == lport && 10220Sstevel@tonic-gate oconnp->conn_zoneid == connp->conn_zoneid && 102311042SErik.Nordmark@Sun.COM oconnp->conn_family == connp->conn_family && 102411042SErik.Nordmark@Sun.COM ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) || 102511042SErik.Nordmark@Sun.COM IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) || 102611042SErik.Nordmark@Sun.COM IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) || 102711042SErik.Nordmark@Sun.COM IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) || 102811042SErik.Nordmark@Sun.COM IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6, 102911042SErik.Nordmark@Sun.COM &connp->conn_laddr_v6))) { 10300Sstevel@tonic-gate break; 10310Sstevel@tonic-gate } 10320Sstevel@tonic-gate } 10330Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 10340Sstevel@tonic-gate if (oconnp != NULL) 10350Sstevel@tonic-gate return (EADDRNOTAVAIL); 10360Sstevel@tonic-gate 103711042SErik.Nordmark@Sun.COM if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) || 103811042SErik.Nordmark@Sun.COM IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 103911042SErik.Nordmark@Sun.COM if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) || 104011042SErik.Nordmark@Sun.COM IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) { 10410Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10420Sstevel@tonic-gate } else { 10430Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10440Sstevel@tonic-gate } 10450Sstevel@tonic-gate } else { 10460Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 10470Sstevel@tonic-gate } 10480Sstevel@tonic-gate return (0); 10490Sstevel@tonic-gate } 10500Sstevel@tonic-gate 105110616SSebastien.Roy@Sun.COM static int 105211042SErik.Nordmark@Sun.COM ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst) 105310616SSebastien.Roy@Sun.COM { 105410616SSebastien.Roy@Sun.COM connf_t *connfp; 105510616SSebastien.Roy@Sun.COM conn_t *tconnp; 105611042SErik.Nordmark@Sun.COM ipaddr_t laddr = connp->conn_laddr_v4; 105711042SErik.Nordmark@Sun.COM ipaddr_t faddr = connp->conn_faddr_v4; 105810616SSebastien.Roy@Sun.COM 105911042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)]; 106010616SSebastien.Roy@Sun.COM mutex_enter(&connfp->connf_lock); 106110616SSebastien.Roy@Sun.COM for (tconnp = connfp->connf_head; tconnp != NULL; 106210616SSebastien.Roy@Sun.COM tconnp = tconnp->conn_next) { 106311042SErik.Nordmark@Sun.COM if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) { 106410616SSebastien.Roy@Sun.COM /* A tunnel is already bound to these addresses. */ 106510616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock); 106610616SSebastien.Roy@Sun.COM return (EADDRINUSE); 106710616SSebastien.Roy@Sun.COM } 106810616SSebastien.Roy@Sun.COM } 106910616SSebastien.Roy@Sun.COM IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 107010616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock); 107110616SSebastien.Roy@Sun.COM return (0); 107210616SSebastien.Roy@Sun.COM } 107310616SSebastien.Roy@Sun.COM 107410616SSebastien.Roy@Sun.COM static int 107511042SErik.Nordmark@Sun.COM ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst) 107610616SSebastien.Roy@Sun.COM { 107710616SSebastien.Roy@Sun.COM connf_t *connfp; 107810616SSebastien.Roy@Sun.COM conn_t *tconnp; 107911042SErik.Nordmark@Sun.COM in6_addr_t *laddr = &connp->conn_laddr_v6; 108011042SErik.Nordmark@Sun.COM in6_addr_t *faddr = &connp->conn_faddr_v6; 108110616SSebastien.Roy@Sun.COM 108211042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)]; 108310616SSebastien.Roy@Sun.COM mutex_enter(&connfp->connf_lock); 108410616SSebastien.Roy@Sun.COM for (tconnp = connfp->connf_head; tconnp != NULL; 108510616SSebastien.Roy@Sun.COM tconnp = tconnp->conn_next) { 108611042SErik.Nordmark@Sun.COM if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) { 108710616SSebastien.Roy@Sun.COM /* A tunnel is already bound to these addresses. */ 108810616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock); 108910616SSebastien.Roy@Sun.COM return (EADDRINUSE); 109010616SSebastien.Roy@Sun.COM } 109110616SSebastien.Roy@Sun.COM } 109210616SSebastien.Roy@Sun.COM IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 109310616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock); 109410616SSebastien.Roy@Sun.COM return (0); 109510616SSebastien.Roy@Sun.COM } 109610616SSebastien.Roy@Sun.COM 10970Sstevel@tonic-gate /* 10981676Sjpk * Check for a MAC exemption conflict on a labeled system. Note that for 10991676Sjpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 11001676Sjpk * transport layer. This check is for binding all other protocols. 11011676Sjpk * 11021676Sjpk * Returns true if there's a conflict. 11031676Sjpk */ 11041676Sjpk static boolean_t 11053448Sdh155122 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 11061676Sjpk { 11071676Sjpk connf_t *connfp; 11081676Sjpk conn_t *tconn; 11091676Sjpk 111011042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto]; 11111676Sjpk mutex_enter(&connfp->connf_lock); 11121676Sjpk for (tconn = connfp->connf_head; tconn != NULL; 11131676Sjpk tconn = tconn->conn_next) { 11141676Sjpk /* We don't allow v4 fallback for v6 raw socket */ 111511042SErik.Nordmark@Sun.COM if (connp->conn_family != tconn->conn_family) 11161676Sjpk continue; 11171676Sjpk /* If neither is exempt, then there's no conflict */ 111810934Ssommerfeld@sun.com if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 111910934Ssommerfeld@sun.com (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 11201676Sjpk continue; 11219710SKen.Powell@Sun.COM /* We are only concerned about sockets for a different zone */ 11229710SKen.Powell@Sun.COM if (connp->conn_zoneid == tconn->conn_zoneid) 11239710SKen.Powell@Sun.COM continue; 11241676Sjpk /* If both are bound to different specific addrs, ok */ 112511042SErik.Nordmark@Sun.COM if (connp->conn_laddr_v4 != INADDR_ANY && 112611042SErik.Nordmark@Sun.COM tconn->conn_laddr_v4 != INADDR_ANY && 112711042SErik.Nordmark@Sun.COM connp->conn_laddr_v4 != tconn->conn_laddr_v4) 11281676Sjpk continue; 11291676Sjpk /* These two conflict; fail */ 11301676Sjpk break; 11311676Sjpk } 11321676Sjpk mutex_exit(&connfp->connf_lock); 11331676Sjpk return (tconn != NULL); 11341676Sjpk } 11351676Sjpk 11361676Sjpk static boolean_t 11373448Sdh155122 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 11381676Sjpk { 11391676Sjpk connf_t *connfp; 11401676Sjpk conn_t *tconn; 11411676Sjpk 114211042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto]; 11431676Sjpk mutex_enter(&connfp->connf_lock); 11441676Sjpk for (tconn = connfp->connf_head; tconn != NULL; 11451676Sjpk tconn = tconn->conn_next) { 11461676Sjpk /* We don't allow v4 fallback for v6 raw socket */ 114711042SErik.Nordmark@Sun.COM if (connp->conn_family != tconn->conn_family) 11481676Sjpk continue; 11491676Sjpk /* If neither is exempt, then there's no conflict */ 115010934Ssommerfeld@sun.com if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 115110934Ssommerfeld@sun.com (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 11521676Sjpk continue; 11539710SKen.Powell@Sun.COM /* We are only concerned about sockets for a different zone */ 11549710SKen.Powell@Sun.COM if (connp->conn_zoneid == tconn->conn_zoneid) 11559710SKen.Powell@Sun.COM continue; 11561676Sjpk /* If both are bound to different addrs, ok */ 115711042SErik.Nordmark@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) && 115811042SErik.Nordmark@Sun.COM !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) && 115911042SErik.Nordmark@Sun.COM !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 116011042SErik.Nordmark@Sun.COM &tconn->conn_laddr_v6)) 11611676Sjpk continue; 11621676Sjpk /* These two conflict; fail */ 11631676Sjpk break; 11641676Sjpk } 11651676Sjpk mutex_exit(&connfp->connf_lock); 11661676Sjpk return (tconn != NULL); 11671676Sjpk } 11681676Sjpk 11691676Sjpk /* 11700Sstevel@tonic-gate * (v4, v6) bind hash insertion routines 117111042SErik.Nordmark@Sun.COM * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport) 11720Sstevel@tonic-gate */ 117311042SErik.Nordmark@Sun.COM 11740Sstevel@tonic-gate int 117511042SErik.Nordmark@Sun.COM ipcl_bind_insert(conn_t *connp) 117611042SErik.Nordmark@Sun.COM { 117711042SErik.Nordmark@Sun.COM if (connp->conn_ipversion == IPV6_VERSION) 117811042SErik.Nordmark@Sun.COM return (ipcl_bind_insert_v6(connp)); 117911042SErik.Nordmark@Sun.COM else 118011042SErik.Nordmark@Sun.COM return (ipcl_bind_insert_v4(connp)); 118111042SErik.Nordmark@Sun.COM } 118211042SErik.Nordmark@Sun.COM 118311042SErik.Nordmark@Sun.COM int 118411042SErik.Nordmark@Sun.COM ipcl_bind_insert_v4(conn_t *connp) 11850Sstevel@tonic-gate { 11860Sstevel@tonic-gate connf_t *connfp; 11870Sstevel@tonic-gate int ret = 0; 11883448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 118911042SErik.Nordmark@Sun.COM uint16_t lport = connp->conn_lport; 119011042SErik.Nordmark@Sun.COM uint8_t protocol = connp->conn_proto; 11910Sstevel@tonic-gate 119210616SSebastien.Roy@Sun.COM if (IPCL_IS_IPTUN(connp)) 119311042SErik.Nordmark@Sun.COM return (ipcl_iptun_hash_insert(connp, ipst)); 119410616SSebastien.Roy@Sun.COM 11950Sstevel@tonic-gate switch (protocol) { 11961676Sjpk default: 11973448Sdh155122 if (is_system_labeled() && 11983448Sdh155122 check_exempt_conflict_v4(connp, ipst)) 11991676Sjpk return (EADDRINUSE); 12001676Sjpk /* FALLTHROUGH */ 12010Sstevel@tonic-gate case IPPROTO_UDP: 12020Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 12033448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 12043448Sdh155122 IPCL_UDP_HASH(lport, ipst)]; 12050Sstevel@tonic-gate } else { 120611042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol]; 12070Sstevel@tonic-gate } 12080Sstevel@tonic-gate 120911042SErik.Nordmark@Sun.COM if (connp->conn_faddr_v4 != INADDR_ANY) { 12100Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 121111042SErik.Nordmark@Sun.COM } else if (connp->conn_laddr_v4 != INADDR_ANY) { 12120Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12130Sstevel@tonic-gate } else { 12140Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12150Sstevel@tonic-gate } 121611042SErik.Nordmark@Sun.COM if (protocol == IPPROTO_RSVP) 121711042SErik.Nordmark@Sun.COM ill_set_inputfn_all(ipst); 12180Sstevel@tonic-gate break; 12190Sstevel@tonic-gate 12200Sstevel@tonic-gate case IPPROTO_TCP: 12210Sstevel@tonic-gate /* Insert it in the Bind Hash */ 12221676Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 12233448Sdh155122 connfp = &ipst->ips_ipcl_bind_fanout[ 12243448Sdh155122 IPCL_BIND_HASH(lport, ipst)]; 122511042SErik.Nordmark@Sun.COM if (connp->conn_laddr_v4 != INADDR_ANY) { 12260Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12270Sstevel@tonic-gate } else { 12280Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12290Sstevel@tonic-gate } 12300Sstevel@tonic-gate if (cl_inet_listen != NULL) { 123111042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ipversion == IPV4_VERSION); 12320Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 12338392SHuafeng.Lv@Sun.COM (*cl_inet_listen)( 12348392SHuafeng.Lv@Sun.COM connp->conn_netstack->netstack_stackid, 12358392SHuafeng.Lv@Sun.COM IPPROTO_TCP, AF_INET, 123611042SErik.Nordmark@Sun.COM (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL); 12370Sstevel@tonic-gate } 12380Sstevel@tonic-gate break; 12390Sstevel@tonic-gate 12400Sstevel@tonic-gate case IPPROTO_SCTP: 12410Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 12420Sstevel@tonic-gate break; 12430Sstevel@tonic-gate } 12440Sstevel@tonic-gate 12450Sstevel@tonic-gate return (ret); 12460Sstevel@tonic-gate } 12470Sstevel@tonic-gate 12480Sstevel@tonic-gate int 124911042SErik.Nordmark@Sun.COM ipcl_bind_insert_v6(conn_t *connp) 12500Sstevel@tonic-gate { 125110616SSebastien.Roy@Sun.COM connf_t *connfp; 125210616SSebastien.Roy@Sun.COM int ret = 0; 12533448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 125411042SErik.Nordmark@Sun.COM uint16_t lport = connp->conn_lport; 125511042SErik.Nordmark@Sun.COM uint8_t protocol = connp->conn_proto; 12560Sstevel@tonic-gate 125710616SSebastien.Roy@Sun.COM if (IPCL_IS_IPTUN(connp)) { 125811042SErik.Nordmark@Sun.COM return (ipcl_iptun_hash_insert_v6(connp, ipst)); 125910616SSebastien.Roy@Sun.COM } 126010616SSebastien.Roy@Sun.COM 12610Sstevel@tonic-gate switch (protocol) { 12621676Sjpk default: 12633448Sdh155122 if (is_system_labeled() && 12643448Sdh155122 check_exempt_conflict_v6(connp, ipst)) 12651676Sjpk return (EADDRINUSE); 12661676Sjpk /* FALLTHROUGH */ 12670Sstevel@tonic-gate case IPPROTO_UDP: 12680Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 12693448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 12703448Sdh155122 IPCL_UDP_HASH(lport, ipst)]; 12710Sstevel@tonic-gate } else { 12723448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 12730Sstevel@tonic-gate } 12740Sstevel@tonic-gate 127511042SErik.Nordmark@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) { 12760Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 127711042SErik.Nordmark@Sun.COM } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 12780Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12790Sstevel@tonic-gate } else { 12800Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12810Sstevel@tonic-gate } 12820Sstevel@tonic-gate break; 12830Sstevel@tonic-gate 12840Sstevel@tonic-gate case IPPROTO_TCP: 12850Sstevel@tonic-gate /* Insert it in the Bind Hash */ 12861676Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 12873448Sdh155122 connfp = &ipst->ips_ipcl_bind_fanout[ 12883448Sdh155122 IPCL_BIND_HASH(lport, ipst)]; 128911042SErik.Nordmark@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 12900Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12910Sstevel@tonic-gate } else { 12920Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12930Sstevel@tonic-gate } 12940Sstevel@tonic-gate if (cl_inet_listen != NULL) { 12950Sstevel@tonic-gate sa_family_t addr_family; 12960Sstevel@tonic-gate uint8_t *laddrp; 12970Sstevel@tonic-gate 129811042SErik.Nordmark@Sun.COM if (connp->conn_ipversion == IPV6_VERSION) { 12990Sstevel@tonic-gate addr_family = AF_INET6; 13000Sstevel@tonic-gate laddrp = 130111042SErik.Nordmark@Sun.COM (uint8_t *)&connp->conn_bound_addr_v6; 13020Sstevel@tonic-gate } else { 13030Sstevel@tonic-gate addr_family = AF_INET; 130411042SErik.Nordmark@Sun.COM laddrp = (uint8_t *)&connp->conn_bound_addr_v4; 13050Sstevel@tonic-gate } 13060Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 13078392SHuafeng.Lv@Sun.COM (*cl_inet_listen)( 13088392SHuafeng.Lv@Sun.COM connp->conn_netstack->netstack_stackid, 13098392SHuafeng.Lv@Sun.COM IPPROTO_TCP, addr_family, laddrp, lport, NULL); 13100Sstevel@tonic-gate } 13110Sstevel@tonic-gate break; 13120Sstevel@tonic-gate 13130Sstevel@tonic-gate case IPPROTO_SCTP: 13140Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 13150Sstevel@tonic-gate break; 13160Sstevel@tonic-gate } 13170Sstevel@tonic-gate 13180Sstevel@tonic-gate return (ret); 13190Sstevel@tonic-gate } 13200Sstevel@tonic-gate 13210Sstevel@tonic-gate /* 13220Sstevel@tonic-gate * ipcl_conn_hash insertion routines. 132311042SErik.Nordmark@Sun.COM * The caller has already set conn_proto and the addresses/ports in the conn_t. 13240Sstevel@tonic-gate */ 132511042SErik.Nordmark@Sun.COM 13260Sstevel@tonic-gate int 132711042SErik.Nordmark@Sun.COM ipcl_conn_insert(conn_t *connp) 132811042SErik.Nordmark@Sun.COM { 132911042SErik.Nordmark@Sun.COM if (connp->conn_ipversion == IPV6_VERSION) 133011042SErik.Nordmark@Sun.COM return (ipcl_conn_insert_v6(connp)); 133111042SErik.Nordmark@Sun.COM else 133211042SErik.Nordmark@Sun.COM return (ipcl_conn_insert_v4(connp)); 133311042SErik.Nordmark@Sun.COM } 133411042SErik.Nordmark@Sun.COM 133511042SErik.Nordmark@Sun.COM int 133611042SErik.Nordmark@Sun.COM ipcl_conn_insert_v4(conn_t *connp) 13370Sstevel@tonic-gate { 13380Sstevel@tonic-gate connf_t *connfp; 13390Sstevel@tonic-gate conn_t *tconnp; 13400Sstevel@tonic-gate int ret = 0; 13413448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 134211042SErik.Nordmark@Sun.COM uint16_t lport = connp->conn_lport; 134311042SErik.Nordmark@Sun.COM uint8_t protocol = connp->conn_proto; 13440Sstevel@tonic-gate 134510616SSebastien.Roy@Sun.COM if (IPCL_IS_IPTUN(connp)) 134611042SErik.Nordmark@Sun.COM return (ipcl_iptun_hash_insert(connp, ipst)); 134710616SSebastien.Roy@Sun.COM 13480Sstevel@tonic-gate switch (protocol) { 13490Sstevel@tonic-gate case IPPROTO_TCP: 13508432SJonathan.Anderson@Sun.COM /* 135111042SErik.Nordmark@Sun.COM * For TCP, we check whether the connection tuple already 13528432SJonathan.Anderson@Sun.COM * exists before allowing the connection to proceed. We 13538432SJonathan.Anderson@Sun.COM * also allow indexing on the zoneid. This is to allow 13548432SJonathan.Anderson@Sun.COM * multiple shared stack zones to have the same tcp 13558432SJonathan.Anderson@Sun.COM * connection tuple. In practice this only happens for 13568432SJonathan.Anderson@Sun.COM * INADDR_LOOPBACK as it's the only local address which 13578432SJonathan.Anderson@Sun.COM * doesn't have to be unique. 13588432SJonathan.Anderson@Sun.COM */ 13593448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[ 136011042SErik.Nordmark@Sun.COM IPCL_CONN_HASH(connp->conn_faddr_v4, 13613448Sdh155122 connp->conn_ports, ipst)]; 13620Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13630Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 13640Sstevel@tonic-gate tconnp = tconnp->conn_next) { 136511042SErik.Nordmark@Sun.COM if (IPCL_CONN_MATCH(tconnp, connp->conn_proto, 136611042SErik.Nordmark@Sun.COM connp->conn_faddr_v4, connp->conn_laddr_v4, 136711042SErik.Nordmark@Sun.COM connp->conn_ports) && 136811042SErik.Nordmark@Sun.COM IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 13690Sstevel@tonic-gate /* Already have a conn. bail out */ 13700Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13710Sstevel@tonic-gate return (EADDRINUSE); 13720Sstevel@tonic-gate } 13730Sstevel@tonic-gate } 13740Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 13750Sstevel@tonic-gate /* 13760Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 13770Sstevel@tonic-gate * rebind. Let it happen. 13780Sstevel@tonic-gate */ 13790Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13800Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 13810Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13820Sstevel@tonic-gate } 13833104Sjprakash 13843104Sjprakash ASSERT(connp->conn_recv != NULL); 138511042SErik.Nordmark@Sun.COM ASSERT(connp->conn_recvicmp != NULL); 13863104Sjprakash 13870Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 13880Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13890Sstevel@tonic-gate break; 13900Sstevel@tonic-gate 13910Sstevel@tonic-gate case IPPROTO_SCTP: 1392409Skcpoon /* 1393409Skcpoon * The raw socket may have already been bound, remove it 1394409Skcpoon * from the hash first. 1395409Skcpoon */ 1396409Skcpoon IPCL_HASH_REMOVE(connp); 13970Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 13980Sstevel@tonic-gate break; 13990Sstevel@tonic-gate 14001676Sjpk default: 14011676Sjpk /* 14021676Sjpk * Check for conflicts among MAC exempt bindings. For 14031676Sjpk * transports with port numbers, this is done by the upper 14041676Sjpk * level per-transport binding logic. For all others, it's 14051676Sjpk * done here. 14061676Sjpk */ 14073448Sdh155122 if (is_system_labeled() && 14083448Sdh155122 check_exempt_conflict_v4(connp, ipst)) 14091676Sjpk return (EADDRINUSE); 14101676Sjpk /* FALLTHROUGH */ 14111676Sjpk 14120Sstevel@tonic-gate case IPPROTO_UDP: 14130Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 14143448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 141511042SErik.Nordmark@Sun.COM IPCL_UDP_HASH(lport, ipst)]; 14160Sstevel@tonic-gate } else { 141711042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol]; 14180Sstevel@tonic-gate } 14190Sstevel@tonic-gate 142011042SErik.Nordmark@Sun.COM if (connp->conn_faddr_v4 != INADDR_ANY) { 14210Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 142211042SErik.Nordmark@Sun.COM } else if (connp->conn_laddr_v4 != INADDR_ANY) { 14230Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 14240Sstevel@tonic-gate } else { 14250Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 14260Sstevel@tonic-gate } 14270Sstevel@tonic-gate break; 14280Sstevel@tonic-gate } 14290Sstevel@tonic-gate 14300Sstevel@tonic-gate return (ret); 14310Sstevel@tonic-gate } 14320Sstevel@tonic-gate 14330Sstevel@tonic-gate int 143411042SErik.Nordmark@Sun.COM ipcl_conn_insert_v6(conn_t *connp) 14350Sstevel@tonic-gate { 14360Sstevel@tonic-gate connf_t *connfp; 14370Sstevel@tonic-gate conn_t *tconnp; 14380Sstevel@tonic-gate int ret = 0; 14393448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 144011042SErik.Nordmark@Sun.COM uint16_t lport = connp->conn_lport; 144111042SErik.Nordmark@Sun.COM uint8_t protocol = connp->conn_proto; 144211042SErik.Nordmark@Sun.COM uint_t ifindex = connp->conn_bound_if; 14430Sstevel@tonic-gate 144410616SSebastien.Roy@Sun.COM if (IPCL_IS_IPTUN(connp)) 144511042SErik.Nordmark@Sun.COM return (ipcl_iptun_hash_insert_v6(connp, ipst)); 144610616SSebastien.Roy@Sun.COM 14470Sstevel@tonic-gate switch (protocol) { 14480Sstevel@tonic-gate case IPPROTO_TCP: 14498432SJonathan.Anderson@Sun.COM 14508432SJonathan.Anderson@Sun.COM /* 14518432SJonathan.Anderson@Sun.COM * For tcp, we check whether the connection tuple already 14528432SJonathan.Anderson@Sun.COM * exists before allowing the connection to proceed. We 14538432SJonathan.Anderson@Sun.COM * also allow indexing on the zoneid. This is to allow 14548432SJonathan.Anderson@Sun.COM * multiple shared stack zones to have the same tcp 14558432SJonathan.Anderson@Sun.COM * connection tuple. In practice this only happens for 14568432SJonathan.Anderson@Sun.COM * ipv6_loopback as it's the only local address which 14578432SJonathan.Anderson@Sun.COM * doesn't have to be unique. 14588432SJonathan.Anderson@Sun.COM */ 14593448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[ 146011042SErik.Nordmark@Sun.COM IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports, 14613448Sdh155122 ipst)]; 14620Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14630Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 14640Sstevel@tonic-gate tconnp = tconnp->conn_next) { 146511042SErik.Nordmark@Sun.COM /* NOTE: need to match zoneid. Bug in onnv-gate */ 146611042SErik.Nordmark@Sun.COM if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto, 146711042SErik.Nordmark@Sun.COM connp->conn_faddr_v6, connp->conn_laddr_v6, 14680Sstevel@tonic-gate connp->conn_ports) && 146911042SErik.Nordmark@Sun.COM (tconnp->conn_bound_if == 0 || 147011042SErik.Nordmark@Sun.COM tconnp->conn_bound_if == ifindex) && 147111042SErik.Nordmark@Sun.COM IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 14720Sstevel@tonic-gate /* Already have a conn. bail out */ 14730Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14740Sstevel@tonic-gate return (EADDRINUSE); 14750Sstevel@tonic-gate } 14760Sstevel@tonic-gate } 14770Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 14780Sstevel@tonic-gate /* 14790Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 14800Sstevel@tonic-gate * rebind. Let it happen. 14810Sstevel@tonic-gate */ 14820Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14830Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 14840Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14850Sstevel@tonic-gate } 14860Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 14870Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14880Sstevel@tonic-gate break; 14890Sstevel@tonic-gate 14900Sstevel@tonic-gate case IPPROTO_SCTP: 1491409Skcpoon IPCL_HASH_REMOVE(connp); 14920Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 14930Sstevel@tonic-gate break; 14940Sstevel@tonic-gate 14951676Sjpk default: 14963448Sdh155122 if (is_system_labeled() && 14973448Sdh155122 check_exempt_conflict_v6(connp, ipst)) 14981676Sjpk return (EADDRINUSE); 14991676Sjpk /* FALLTHROUGH */ 15000Sstevel@tonic-gate case IPPROTO_UDP: 15010Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 15023448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[ 150311042SErik.Nordmark@Sun.COM IPCL_UDP_HASH(lport, ipst)]; 15040Sstevel@tonic-gate } else { 15053448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 15060Sstevel@tonic-gate } 15070Sstevel@tonic-gate 150811042SErik.Nordmark@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) { 15090Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 151011042SErik.Nordmark@Sun.COM } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 15110Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 15120Sstevel@tonic-gate } else { 15130Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 15140Sstevel@tonic-gate } 15150Sstevel@tonic-gate break; 15160Sstevel@tonic-gate } 15170Sstevel@tonic-gate 15180Sstevel@tonic-gate return (ret); 15190Sstevel@tonic-gate } 15200Sstevel@tonic-gate 15210Sstevel@tonic-gate /* 15220Sstevel@tonic-gate * v4 packet classifying function. looks up the fanout table to 15230Sstevel@tonic-gate * find the conn, the packet belongs to. returns the conn with 15240Sstevel@tonic-gate * the reference held, null otherwise. 15251676Sjpk * 15261676Sjpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection 15271676Sjpk * Lookup" comment block are applied. Labels are also checked as described 15281676Sjpk * above. If the packet is from the inside (looped back), and is from the same 15291676Sjpk * zone, then label checks are omitted. 15300Sstevel@tonic-gate */ 15310Sstevel@tonic-gate conn_t * 153211042SErik.Nordmark@Sun.COM ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, 153311042SErik.Nordmark@Sun.COM ip_recv_attr_t *ira, ip_stack_t *ipst) 15340Sstevel@tonic-gate { 15350Sstevel@tonic-gate ipha_t *ipha; 15360Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 15370Sstevel@tonic-gate uint16_t lport; 15380Sstevel@tonic-gate uint16_t fport; 15390Sstevel@tonic-gate uint32_t ports; 15400Sstevel@tonic-gate conn_t *connp; 15410Sstevel@tonic-gate uint16_t *up; 154211042SErik.Nordmark@Sun.COM zoneid_t zoneid = ira->ira_zoneid; 15430Sstevel@tonic-gate 15440Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 15450Sstevel@tonic-gate up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 15460Sstevel@tonic-gate 15470Sstevel@tonic-gate switch (protocol) { 15480Sstevel@tonic-gate case IPPROTO_TCP: 15490Sstevel@tonic-gate ports = *(uint32_t *)up; 15500Sstevel@tonic-gate connfp = 15513448Sdh155122 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 15523448Sdh155122 ports, ipst)]; 15530Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 15540Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 15550Sstevel@tonic-gate connp = connp->conn_next) { 155611042SErik.Nordmark@Sun.COM if (IPCL_CONN_MATCH(connp, protocol, 155711042SErik.Nordmark@Sun.COM ipha->ipha_src, ipha->ipha_dst, ports) && 155811042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid || 155911042SErik.Nordmark@Sun.COM connp->conn_allzones || 156011042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 156111042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 156211042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 15630Sstevel@tonic-gate break; 15640Sstevel@tonic-gate } 15650Sstevel@tonic-gate 15660Sstevel@tonic-gate if (connp != NULL) { 15671676Sjpk /* 15681676Sjpk * We have a fully-bound TCP connection. 15691676Sjpk * 15701676Sjpk * For labeled systems, there's no need to check the 15711676Sjpk * label here. It's known to be good as we checked 15721676Sjpk * before allowing the connection to become bound. 15731676Sjpk */ 15740Sstevel@tonic-gate CONN_INC_REF(connp); 15750Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15760Sstevel@tonic-gate return (connp); 15770Sstevel@tonic-gate } 15780Sstevel@tonic-gate 15790Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15800Sstevel@tonic-gate lport = up[1]; 15813448Sdh155122 bind_connfp = 15823448Sdh155122 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 15830Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 15840Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 15850Sstevel@tonic-gate connp = connp->conn_next) { 15861676Sjpk if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 158711042SErik.Nordmark@Sun.COM lport) && 158811042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid || 158911042SErik.Nordmark@Sun.COM connp->conn_allzones || 159011042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 159111042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 159211042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 15930Sstevel@tonic-gate break; 15940Sstevel@tonic-gate } 15950Sstevel@tonic-gate 15961676Sjpk /* 15971676Sjpk * If the matching connection is SLP on a private address, then 15981676Sjpk * the label on the packet must match the local zone's label. 15991676Sjpk * Otherwise, it must be in the label range defined by tnrh. 160011042SErik.Nordmark@Sun.COM * This is ensured by tsol_receive_local. 160111042SErik.Nordmark@Sun.COM * 160211042SErik.Nordmark@Sun.COM * Note that we don't check tsol_receive_local for 160311042SErik.Nordmark@Sun.COM * the connected case. 16041676Sjpk */ 160511042SErik.Nordmark@Sun.COM if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 16061676Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 160711042SErik.Nordmark@Sun.COM ira, connp)) { 160811042SErik.Nordmark@Sun.COM DTRACE_PROBE3(tx__ip__log__info__classify__tcp, 160911042SErik.Nordmark@Sun.COM char *, "connp(1) could not receive mp(2)", 161011042SErik.Nordmark@Sun.COM conn_t *, connp, mblk_t *, mp); 16111676Sjpk connp = NULL; 16121676Sjpk } 16131676Sjpk 16140Sstevel@tonic-gate if (connp != NULL) { 16151676Sjpk /* Have a listener at least */ 16160Sstevel@tonic-gate CONN_INC_REF(connp); 16170Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 16180Sstevel@tonic-gate return (connp); 16190Sstevel@tonic-gate } 16200Sstevel@tonic-gate 16210Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 16220Sstevel@tonic-gate break; 16230Sstevel@tonic-gate 16240Sstevel@tonic-gate case IPPROTO_UDP: 16250Sstevel@tonic-gate lport = up[1]; 16260Sstevel@tonic-gate fport = up[0]; 16273448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 16280Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16290Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16300Sstevel@tonic-gate connp = connp->conn_next) { 16310Sstevel@tonic-gate if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 16320Sstevel@tonic-gate fport, ipha->ipha_src) && 163311042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid || 163411042SErik.Nordmark@Sun.COM connp->conn_allzones || 163511042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 163611042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE)))) 16370Sstevel@tonic-gate break; 16380Sstevel@tonic-gate } 16390Sstevel@tonic-gate 164011042SErik.Nordmark@Sun.COM if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 16411676Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 164211042SErik.Nordmark@Sun.COM ira, connp)) { 16431676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp, 16441676Sjpk char *, "connp(1) could not receive mp(2)", 16451676Sjpk conn_t *, connp, mblk_t *, mp); 16461676Sjpk connp = NULL; 16471676Sjpk } 16481676Sjpk 16490Sstevel@tonic-gate if (connp != NULL) { 16500Sstevel@tonic-gate CONN_INC_REF(connp); 16510Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16520Sstevel@tonic-gate return (connp); 16530Sstevel@tonic-gate } 16540Sstevel@tonic-gate 16550Sstevel@tonic-gate /* 16560Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 16570Sstevel@tonic-gate */ 16580Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 165911042SErik.Nordmark@Sun.COM 16600Sstevel@tonic-gate break; 166110616SSebastien.Roy@Sun.COM 166210616SSebastien.Roy@Sun.COM case IPPROTO_ENCAP: 166310616SSebastien.Roy@Sun.COM case IPPROTO_IPV6: 166410616SSebastien.Roy@Sun.COM return (ipcl_iptun_classify_v4(&ipha->ipha_src, 166510616SSebastien.Roy@Sun.COM &ipha->ipha_dst, ipst)); 16660Sstevel@tonic-gate } 16670Sstevel@tonic-gate 16680Sstevel@tonic-gate return (NULL); 16690Sstevel@tonic-gate } 16700Sstevel@tonic-gate 16710Sstevel@tonic-gate conn_t * 167211042SErik.Nordmark@Sun.COM ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, 167311042SErik.Nordmark@Sun.COM ip_recv_attr_t *ira, ip_stack_t *ipst) 16740Sstevel@tonic-gate { 16750Sstevel@tonic-gate ip6_t *ip6h; 16760Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 16770Sstevel@tonic-gate uint16_t lport; 16780Sstevel@tonic-gate uint16_t fport; 167911042SErik.Nordmark@Sun.COM tcpha_t *tcpha; 16800Sstevel@tonic-gate uint32_t ports; 16810Sstevel@tonic-gate conn_t *connp; 16820Sstevel@tonic-gate uint16_t *up; 168311042SErik.Nordmark@Sun.COM zoneid_t zoneid = ira->ira_zoneid; 16840Sstevel@tonic-gate 16850Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 16860Sstevel@tonic-gate 16870Sstevel@tonic-gate switch (protocol) { 16880Sstevel@tonic-gate case IPPROTO_TCP: 168911042SErik.Nordmark@Sun.COM tcpha = (tcpha_t *)&mp->b_rptr[hdr_len]; 169011042SErik.Nordmark@Sun.COM up = &tcpha->tha_lport; 16910Sstevel@tonic-gate ports = *(uint32_t *)up; 16920Sstevel@tonic-gate 16930Sstevel@tonic-gate connfp = 16943448Sdh155122 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 16953448Sdh155122 ports, ipst)]; 16960Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16970Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16980Sstevel@tonic-gate connp = connp->conn_next) { 169911042SErik.Nordmark@Sun.COM if (IPCL_CONN_MATCH_V6(connp, protocol, 170011042SErik.Nordmark@Sun.COM ip6h->ip6_src, ip6h->ip6_dst, ports) && 170111042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid || 170211042SErik.Nordmark@Sun.COM connp->conn_allzones || 170311042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 170411042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 170511042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 17060Sstevel@tonic-gate break; 17070Sstevel@tonic-gate } 17080Sstevel@tonic-gate 17090Sstevel@tonic-gate if (connp != NULL) { 17101676Sjpk /* 17111676Sjpk * We have a fully-bound TCP connection. 17121676Sjpk * 17131676Sjpk * For labeled systems, there's no need to check the 17141676Sjpk * label here. It's known to be good as we checked 17151676Sjpk * before allowing the connection to become bound. 17161676Sjpk */ 17170Sstevel@tonic-gate CONN_INC_REF(connp); 17180Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17190Sstevel@tonic-gate return (connp); 17200Sstevel@tonic-gate } 17210Sstevel@tonic-gate 17220Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17230Sstevel@tonic-gate 17240Sstevel@tonic-gate lport = up[1]; 17253448Sdh155122 bind_connfp = 17263448Sdh155122 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 17270Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 17280Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 17290Sstevel@tonic-gate connp = connp->conn_next) { 17300Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, protocol, 17310Sstevel@tonic-gate ip6h->ip6_dst, lport) && 173211042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid || 173311042SErik.Nordmark@Sun.COM connp->conn_allzones || 173411042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 173511042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 173611042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 17370Sstevel@tonic-gate break; 17380Sstevel@tonic-gate } 17390Sstevel@tonic-gate 174011042SErik.Nordmark@Sun.COM if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 17411676Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 174211042SErik.Nordmark@Sun.COM ira, connp)) { 17431676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 17441676Sjpk char *, "connp(1) could not receive mp(2)", 17451676Sjpk conn_t *, connp, mblk_t *, mp); 17461676Sjpk connp = NULL; 17471676Sjpk } 17481676Sjpk 17490Sstevel@tonic-gate if (connp != NULL) { 17500Sstevel@tonic-gate /* Have a listner at least */ 17510Sstevel@tonic-gate CONN_INC_REF(connp); 17520Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 17530Sstevel@tonic-gate return (connp); 17540Sstevel@tonic-gate } 17550Sstevel@tonic-gate 17560Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 17570Sstevel@tonic-gate break; 17580Sstevel@tonic-gate 17590Sstevel@tonic-gate case IPPROTO_UDP: 17600Sstevel@tonic-gate up = (uint16_t *)&mp->b_rptr[hdr_len]; 17610Sstevel@tonic-gate lport = up[1]; 17620Sstevel@tonic-gate fport = up[0]; 17633448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 17640Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 17650Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 17660Sstevel@tonic-gate connp = connp->conn_next) { 17670Sstevel@tonic-gate if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 17680Sstevel@tonic-gate fport, ip6h->ip6_src) && 176911042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid || 177011042SErik.Nordmark@Sun.COM connp->conn_allzones || 177111042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 177211042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 177311042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 17740Sstevel@tonic-gate break; 17750Sstevel@tonic-gate } 17760Sstevel@tonic-gate 177711042SErik.Nordmark@Sun.COM if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 17781676Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 177911042SErik.Nordmark@Sun.COM ira, connp)) { 17801676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 17811676Sjpk char *, "connp(1) could not receive mp(2)", 17821676Sjpk conn_t *, connp, mblk_t *, mp); 17831676Sjpk connp = NULL; 17841676Sjpk } 17851676Sjpk 17860Sstevel@tonic-gate if (connp != NULL) { 17870Sstevel@tonic-gate CONN_INC_REF(connp); 17880Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17890Sstevel@tonic-gate return (connp); 17900Sstevel@tonic-gate } 17910Sstevel@tonic-gate 17920Sstevel@tonic-gate /* 17930Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 17940Sstevel@tonic-gate */ 17950Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17960Sstevel@tonic-gate break; 179710616SSebastien.Roy@Sun.COM case IPPROTO_ENCAP: 179810616SSebastien.Roy@Sun.COM case IPPROTO_IPV6: 179910616SSebastien.Roy@Sun.COM return (ipcl_iptun_classify_v6(&ip6h->ip6_src, 180010616SSebastien.Roy@Sun.COM &ip6h->ip6_dst, ipst)); 18010Sstevel@tonic-gate } 18020Sstevel@tonic-gate 18030Sstevel@tonic-gate return (NULL); 18040Sstevel@tonic-gate } 18050Sstevel@tonic-gate 18060Sstevel@tonic-gate /* 18070Sstevel@tonic-gate * wrapper around ipcl_classify_(v4,v6) routines. 18080Sstevel@tonic-gate */ 18090Sstevel@tonic-gate conn_t * 181011042SErik.Nordmark@Sun.COM ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst) 18110Sstevel@tonic-gate { 181211042SErik.Nordmark@Sun.COM if (ira->ira_flags & IRAF_IS_IPV4) { 181311042SErik.Nordmark@Sun.COM return (ipcl_classify_v4(mp, ira->ira_protocol, 181411042SErik.Nordmark@Sun.COM ira->ira_ip_hdr_length, ira, ipst)); 181511042SErik.Nordmark@Sun.COM } else { 181611042SErik.Nordmark@Sun.COM return (ipcl_classify_v6(mp, ira->ira_protocol, 181711042SErik.Nordmark@Sun.COM ira->ira_ip_hdr_length, ira, ipst)); 18180Sstevel@tonic-gate } 18190Sstevel@tonic-gate } 18200Sstevel@tonic-gate 182111042SErik.Nordmark@Sun.COM /* 182211042SErik.Nordmark@Sun.COM * Only used to classify SCTP RAW sockets 182311042SErik.Nordmark@Sun.COM */ 18240Sstevel@tonic-gate conn_t * 182511042SErik.Nordmark@Sun.COM ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports, 182611042SErik.Nordmark@Sun.COM ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst) 18270Sstevel@tonic-gate { 18281676Sjpk connf_t *connfp; 18290Sstevel@tonic-gate conn_t *connp; 18300Sstevel@tonic-gate in_port_t lport; 183111042SErik.Nordmark@Sun.COM int ipversion; 18321676Sjpk const void *dst; 183311042SErik.Nordmark@Sun.COM zoneid_t zoneid = ira->ira_zoneid; 18340Sstevel@tonic-gate 18350Sstevel@tonic-gate lport = ((uint16_t *)&ports)[1]; 183611042SErik.Nordmark@Sun.COM if (ira->ira_flags & IRAF_IS_IPV4) { 183711042SErik.Nordmark@Sun.COM dst = (const void *)&ipha->ipha_dst; 183811042SErik.Nordmark@Sun.COM ipversion = IPV4_VERSION; 183911042SErik.Nordmark@Sun.COM } else { 184011042SErik.Nordmark@Sun.COM dst = (const void *)&ip6h->ip6_dst; 184111042SErik.Nordmark@Sun.COM ipversion = IPV6_VERSION; 18421676Sjpk } 18431676Sjpk 18443448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 18450Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 18460Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 18470Sstevel@tonic-gate connp = connp->conn_next) { 18480Sstevel@tonic-gate /* We don't allow v4 fallback for v6 raw socket. */ 184911042SErik.Nordmark@Sun.COM if (ipversion != connp->conn_ipversion) 18500Sstevel@tonic-gate continue; 185111042SErik.Nordmark@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 185211042SErik.Nordmark@Sun.COM !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 185311042SErik.Nordmark@Sun.COM if (ipversion == IPV4_VERSION) { 18541676Sjpk if (!IPCL_CONN_MATCH(connp, protocol, 185511042SErik.Nordmark@Sun.COM ipha->ipha_src, ipha->ipha_dst, ports)) 18561676Sjpk continue; 18570Sstevel@tonic-gate } else { 18581676Sjpk if (!IPCL_CONN_MATCH_V6(connp, protocol, 185911042SErik.Nordmark@Sun.COM ip6h->ip6_src, ip6h->ip6_dst, ports)) 18601676Sjpk continue; 18610Sstevel@tonic-gate } 18620Sstevel@tonic-gate } else { 186311042SErik.Nordmark@Sun.COM if (ipversion == IPV4_VERSION) { 18641676Sjpk if (!IPCL_BIND_MATCH(connp, protocol, 186511042SErik.Nordmark@Sun.COM ipha->ipha_dst, lport)) 18661676Sjpk continue; 18670Sstevel@tonic-gate } else { 18681676Sjpk if (!IPCL_BIND_MATCH_V6(connp, protocol, 186911042SErik.Nordmark@Sun.COM ip6h->ip6_dst, lport)) 18701676Sjpk continue; 18710Sstevel@tonic-gate } 18720Sstevel@tonic-gate } 18731676Sjpk 187411042SErik.Nordmark@Sun.COM if (connp->conn_zoneid == zoneid || 187511042SErik.Nordmark@Sun.COM connp->conn_allzones || 187611042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 187711042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 187811042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR))) 18791676Sjpk break; 18801676Sjpk } 188111042SErik.Nordmark@Sun.COM 188211042SErik.Nordmark@Sun.COM if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 188311042SErik.Nordmark@Sun.COM !tsol_receive_local(mp, dst, ipversion, ira, connp)) { 18841676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 18851676Sjpk char *, "connp(1) could not receive mp(2)", 18861676Sjpk conn_t *, connp, mblk_t *, mp); 18871676Sjpk connp = NULL; 18880Sstevel@tonic-gate } 1889409Skcpoon 1890409Skcpoon if (connp != NULL) 1891409Skcpoon goto found; 1892409Skcpoon mutex_exit(&connfp->connf_lock); 1893409Skcpoon 189411042SErik.Nordmark@Sun.COM /* Try to look for a wildcard SCTP RAW socket match. */ 18953448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 1896409Skcpoon mutex_enter(&connfp->connf_lock); 1897409Skcpoon for (connp = connfp->connf_head; connp != NULL; 1898409Skcpoon connp = connp->conn_next) { 1899409Skcpoon /* We don't allow v4 fallback for v6 raw socket. */ 190011042SErik.Nordmark@Sun.COM if (ipversion != connp->conn_ipversion) 190111042SErik.Nordmark@Sun.COM continue; 190211042SErik.Nordmark@Sun.COM if (!IPCL_ZONE_MATCH(connp, zoneid)) 1903409Skcpoon continue; 190411042SErik.Nordmark@Sun.COM 190511042SErik.Nordmark@Sun.COM if (ipversion == IPV4_VERSION) { 190611042SErik.Nordmark@Sun.COM if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst)) 1907409Skcpoon break; 1908409Skcpoon } else { 190911042SErik.Nordmark@Sun.COM if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) { 1910409Skcpoon break; 1911409Skcpoon } 1912409Skcpoon } 19130Sstevel@tonic-gate } 1914409Skcpoon 1915409Skcpoon if (connp != NULL) 1916409Skcpoon goto found; 1917409Skcpoon 19180Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 19190Sstevel@tonic-gate return (NULL); 1920409Skcpoon 1921409Skcpoon found: 1922409Skcpoon ASSERT(connp != NULL); 1923409Skcpoon CONN_INC_REF(connp); 1924409Skcpoon mutex_exit(&connfp->connf_lock); 1925409Skcpoon return (connp); 19260Sstevel@tonic-gate } 19270Sstevel@tonic-gate 19280Sstevel@tonic-gate /* ARGSUSED */ 19290Sstevel@tonic-gate static int 19305240Snordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 19310Sstevel@tonic-gate { 19320Sstevel@tonic-gate itc_t *itc = (itc_t *)buf; 19330Sstevel@tonic-gate conn_t *connp = &itc->itc_conn; 19345240Snordmark tcp_t *tcp = (tcp_t *)&itc[1]; 19355240Snordmark 19365240Snordmark bzero(connp, sizeof (conn_t)); 19375240Snordmark bzero(tcp, sizeof (tcp_t)); 19385240Snordmark 19395240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 19405240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 19418348SEric.Yu@Sun.COM cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL); 194211042SErik.Nordmark@Sun.COM tcp->tcp_timercache = tcp_timermp_alloc(kmflags); 194311042SErik.Nordmark@Sun.COM if (tcp->tcp_timercache == NULL) 194411042SErik.Nordmark@Sun.COM return (ENOMEM); 19450Sstevel@tonic-gate connp->conn_tcp = tcp; 19460Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN; 194711042SErik.Nordmark@Sun.COM connp->conn_proto = IPPROTO_TCP; 19480Sstevel@tonic-gate tcp->tcp_connp = connp; 194911042SErik.Nordmark@Sun.COM rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 195011042SErik.Nordmark@Sun.COM 195111042SErik.Nordmark@Sun.COM connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 195211042SErik.Nordmark@Sun.COM if (connp->conn_ixa == NULL) { 195311042SErik.Nordmark@Sun.COM tcp_timermp_free(tcp); 195411042SErik.Nordmark@Sun.COM return (ENOMEM); 195511042SErik.Nordmark@Sun.COM } 195611042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_refcnt = 1; 195711042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_protocol = connp->conn_proto; 195811042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 19590Sstevel@tonic-gate return (0); 19600Sstevel@tonic-gate } 19610Sstevel@tonic-gate 19620Sstevel@tonic-gate /* ARGSUSED */ 19630Sstevel@tonic-gate static void 19645240Snordmark tcp_conn_destructor(void *buf, void *cdrarg) 19655240Snordmark { 19665240Snordmark itc_t *itc = (itc_t *)buf; 19675240Snordmark conn_t *connp = &itc->itc_conn; 19685240Snordmark tcp_t *tcp = (tcp_t *)&itc[1]; 19695240Snordmark 19705240Snordmark ASSERT(connp->conn_flags & IPCL_TCPCONN); 19715240Snordmark ASSERT(tcp->tcp_connp == connp); 19725240Snordmark ASSERT(connp->conn_tcp == tcp); 19735240Snordmark tcp_timermp_free(tcp); 19745240Snordmark mutex_destroy(&connp->conn_lock); 19755240Snordmark cv_destroy(&connp->conn_cv); 19768348SEric.Yu@Sun.COM cv_destroy(&connp->conn_sq_cv); 197711042SErik.Nordmark@Sun.COM rw_destroy(&connp->conn_ilg_lock); 197811042SErik.Nordmark@Sun.COM 197911042SErik.Nordmark@Sun.COM /* Can be NULL if constructor failed */ 198011042SErik.Nordmark@Sun.COM if (connp->conn_ixa != NULL) { 198111042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_refcnt == 1); 198211042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_ire == NULL); 198311042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_nce == NULL); 198411042SErik.Nordmark@Sun.COM ixa_refrele(connp->conn_ixa); 198511042SErik.Nordmark@Sun.COM } 19865240Snordmark } 19875240Snordmark 19885240Snordmark /* ARGSUSED */ 19895240Snordmark static int 19905240Snordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 19915240Snordmark { 19925240Snordmark itc_t *itc = (itc_t *)buf; 19935240Snordmark conn_t *connp = &itc->itc_conn; 19945240Snordmark 19955240Snordmark bzero(connp, sizeof (conn_t)); 19965240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 19975240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 19985240Snordmark connp->conn_flags = IPCL_IPCCONN; 199911042SErik.Nordmark@Sun.COM rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 20005240Snordmark 200111042SErik.Nordmark@Sun.COM connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 200211042SErik.Nordmark@Sun.COM if (connp->conn_ixa == NULL) 200311042SErik.Nordmark@Sun.COM return (ENOMEM); 200411042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_refcnt = 1; 200511042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 20065240Snordmark return (0); 20075240Snordmark } 20085240Snordmark 20095240Snordmark /* ARGSUSED */ 20105240Snordmark static void 20115240Snordmark ip_conn_destructor(void *buf, void *cdrarg) 20125240Snordmark { 20135240Snordmark itc_t *itc = (itc_t *)buf; 20145240Snordmark conn_t *connp = &itc->itc_conn; 20155240Snordmark 20165240Snordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 20175240Snordmark ASSERT(connp->conn_priv == NULL); 20185240Snordmark mutex_destroy(&connp->conn_lock); 20195240Snordmark cv_destroy(&connp->conn_cv); 202011042SErik.Nordmark@Sun.COM rw_destroy(&connp->conn_ilg_lock); 202111042SErik.Nordmark@Sun.COM 202211042SErik.Nordmark@Sun.COM /* Can be NULL if constructor failed */ 202311042SErik.Nordmark@Sun.COM if (connp->conn_ixa != NULL) { 202411042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_refcnt == 1); 202511042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_ire == NULL); 202611042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_nce == NULL); 202711042SErik.Nordmark@Sun.COM ixa_refrele(connp->conn_ixa); 202811042SErik.Nordmark@Sun.COM } 20295240Snordmark } 20305240Snordmark 20315240Snordmark /* ARGSUSED */ 20325240Snordmark static int 20335240Snordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 20345240Snordmark { 20355240Snordmark itc_t *itc = (itc_t *)buf; 20365240Snordmark conn_t *connp = &itc->itc_conn; 20375240Snordmark udp_t *udp = (udp_t *)&itc[1]; 20385240Snordmark 20395240Snordmark bzero(connp, sizeof (conn_t)); 20405240Snordmark bzero(udp, sizeof (udp_t)); 20415240Snordmark 20425240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 20435240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 20445240Snordmark connp->conn_udp = udp; 20455240Snordmark connp->conn_flags = IPCL_UDPCONN; 204611042SErik.Nordmark@Sun.COM connp->conn_proto = IPPROTO_UDP; 20475240Snordmark udp->udp_connp = connp; 204811042SErik.Nordmark@Sun.COM rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 204911042SErik.Nordmark@Sun.COM connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 205011042SErik.Nordmark@Sun.COM if (connp->conn_ixa == NULL) 205111042SErik.Nordmark@Sun.COM return (ENOMEM); 205211042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_refcnt = 1; 205311042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_protocol = connp->conn_proto; 205411042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 20555240Snordmark return (0); 20565240Snordmark } 20575240Snordmark 20585240Snordmark /* ARGSUSED */ 20595240Snordmark static void 20605240Snordmark udp_conn_destructor(void *buf, void *cdrarg) 20615240Snordmark { 20625240Snordmark itc_t *itc = (itc_t *)buf; 20635240Snordmark conn_t *connp = &itc->itc_conn; 20645240Snordmark udp_t *udp = (udp_t *)&itc[1]; 20655240Snordmark 20665240Snordmark ASSERT(connp->conn_flags & IPCL_UDPCONN); 20675240Snordmark ASSERT(udp->udp_connp == connp); 20685240Snordmark ASSERT(connp->conn_udp == udp); 20695240Snordmark mutex_destroy(&connp->conn_lock); 20705240Snordmark cv_destroy(&connp->conn_cv); 207111042SErik.Nordmark@Sun.COM rw_destroy(&connp->conn_ilg_lock); 207211042SErik.Nordmark@Sun.COM 207311042SErik.Nordmark@Sun.COM /* Can be NULL if constructor failed */ 207411042SErik.Nordmark@Sun.COM if (connp->conn_ixa != NULL) { 207511042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_refcnt == 1); 207611042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_ire == NULL); 207711042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_nce == NULL); 207811042SErik.Nordmark@Sun.COM ixa_refrele(connp->conn_ixa); 207911042SErik.Nordmark@Sun.COM } 20805240Snordmark } 20815240Snordmark 20825240Snordmark /* ARGSUSED */ 20835240Snordmark static int 20845240Snordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 20850Sstevel@tonic-gate { 20865240Snordmark itc_t *itc = (itc_t *)buf; 20875240Snordmark conn_t *connp = &itc->itc_conn; 20885240Snordmark icmp_t *icmp = (icmp_t *)&itc[1]; 20895240Snordmark 20905240Snordmark bzero(connp, sizeof (conn_t)); 20915240Snordmark bzero(icmp, sizeof (icmp_t)); 20925240Snordmark 20935240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 20945240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 20955240Snordmark connp->conn_icmp = icmp; 20965240Snordmark connp->conn_flags = IPCL_RAWIPCONN; 209711042SErik.Nordmark@Sun.COM connp->conn_proto = IPPROTO_ICMP; 20985240Snordmark icmp->icmp_connp = connp; 209911042SErik.Nordmark@Sun.COM rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 210011042SErik.Nordmark@Sun.COM connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 210111042SErik.Nordmark@Sun.COM if (connp->conn_ixa == NULL) 210211042SErik.Nordmark@Sun.COM return (ENOMEM); 210311042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_refcnt = 1; 210411042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_protocol = connp->conn_proto; 210511042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 21065240Snordmark return (0); 21075240Snordmark } 21085240Snordmark 21095240Snordmark /* ARGSUSED */ 21105240Snordmark static void 21115240Snordmark rawip_conn_destructor(void *buf, void *cdrarg) 21125240Snordmark { 21135240Snordmark itc_t *itc = (itc_t *)buf; 21145240Snordmark conn_t *connp = &itc->itc_conn; 21155240Snordmark icmp_t *icmp = (icmp_t *)&itc[1]; 21165240Snordmark 21175240Snordmark ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 21185240Snordmark ASSERT(icmp->icmp_connp == connp); 21195240Snordmark ASSERT(connp->conn_icmp == icmp); 21205240Snordmark mutex_destroy(&connp->conn_lock); 21215240Snordmark cv_destroy(&connp->conn_cv); 212211042SErik.Nordmark@Sun.COM rw_destroy(&connp->conn_ilg_lock); 212311042SErik.Nordmark@Sun.COM 212411042SErik.Nordmark@Sun.COM /* Can be NULL if constructor failed */ 212511042SErik.Nordmark@Sun.COM if (connp->conn_ixa != NULL) { 212611042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_refcnt == 1); 212711042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_ire == NULL); 212811042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_nce == NULL); 212911042SErik.Nordmark@Sun.COM ixa_refrele(connp->conn_ixa); 213011042SErik.Nordmark@Sun.COM } 21315240Snordmark } 21325240Snordmark 21335240Snordmark /* ARGSUSED */ 21345240Snordmark static int 21355240Snordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 21365240Snordmark { 21375240Snordmark itc_t *itc = (itc_t *)buf; 21385240Snordmark conn_t *connp = &itc->itc_conn; 21395240Snordmark rts_t *rts = (rts_t *)&itc[1]; 21405240Snordmark 21415240Snordmark bzero(connp, sizeof (conn_t)); 21425240Snordmark bzero(rts, sizeof (rts_t)); 21435240Snordmark 21445240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 21455240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 21465240Snordmark connp->conn_rts = rts; 21475240Snordmark connp->conn_flags = IPCL_RTSCONN; 21485240Snordmark rts->rts_connp = connp; 214911042SErik.Nordmark@Sun.COM rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 215011042SErik.Nordmark@Sun.COM connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 215111042SErik.Nordmark@Sun.COM if (connp->conn_ixa == NULL) 215211042SErik.Nordmark@Sun.COM return (ENOMEM); 215311042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_refcnt = 1; 215411042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 21555240Snordmark return (0); 21565240Snordmark } 21575240Snordmark 21585240Snordmark /* ARGSUSED */ 21595240Snordmark static void 21605240Snordmark rts_conn_destructor(void *buf, void *cdrarg) 21615240Snordmark { 21625240Snordmark itc_t *itc = (itc_t *)buf; 21635240Snordmark conn_t *connp = &itc->itc_conn; 21645240Snordmark rts_t *rts = (rts_t *)&itc[1]; 21655240Snordmark 21665240Snordmark ASSERT(connp->conn_flags & IPCL_RTSCONN); 21675240Snordmark ASSERT(rts->rts_connp == connp); 21685240Snordmark ASSERT(connp->conn_rts == rts); 21695240Snordmark mutex_destroy(&connp->conn_lock); 21705240Snordmark cv_destroy(&connp->conn_cv); 217111042SErik.Nordmark@Sun.COM rw_destroy(&connp->conn_ilg_lock); 21728444SRao.Shoaib@Sun.COM 217311042SErik.Nordmark@Sun.COM /* Can be NULL if constructor failed */ 217411042SErik.Nordmark@Sun.COM if (connp->conn_ixa != NULL) { 217511042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_refcnt == 1); 217611042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_ire == NULL); 217711042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_nce == NULL); 217811042SErik.Nordmark@Sun.COM ixa_refrele(connp->conn_ixa); 21798348SEric.Yu@Sun.COM } 21808348SEric.Yu@Sun.COM } 21818348SEric.Yu@Sun.COM 21825240Snordmark /* 21835240Snordmark * Called as part of ipcl_conn_destroy to assert and clear any pointers 21845240Snordmark * in the conn_t. 218511042SErik.Nordmark@Sun.COM * 218611042SErik.Nordmark@Sun.COM * Below we list all the pointers in the conn_t as a documentation aid. 218711042SErik.Nordmark@Sun.COM * The ones that we can not ASSERT to be NULL are #ifdef'ed out. 218811042SErik.Nordmark@Sun.COM * If you add any pointers to the conn_t please add an ASSERT here 218911042SErik.Nordmark@Sun.COM * and #ifdef it out if it can't be actually asserted to be NULL. 219011042SErik.Nordmark@Sun.COM * In any case, we bzero most of the conn_t at the end of the function. 21915240Snordmark */ 21925240Snordmark void 21935240Snordmark ipcl_conn_cleanup(conn_t *connp) 21945240Snordmark { 219511042SErik.Nordmark@Sun.COM ip_xmit_attr_t *ixa; 219611042SErik.Nordmark@Sun.COM 21975240Snordmark ASSERT(connp->conn_latch == NULL); 219811042SErik.Nordmark@Sun.COM ASSERT(connp->conn_latch_in_policy == NULL); 219911042SErik.Nordmark@Sun.COM ASSERT(connp->conn_latch_in_action == NULL); 22005240Snordmark #ifdef notdef 22015240Snordmark ASSERT(connp->conn_rq == NULL); 22025240Snordmark ASSERT(connp->conn_wq == NULL); 22035240Snordmark #endif 22045240Snordmark ASSERT(connp->conn_cred == NULL); 22055240Snordmark ASSERT(connp->conn_g_fanout == NULL); 22065240Snordmark ASSERT(connp->conn_g_next == NULL); 22075240Snordmark ASSERT(connp->conn_g_prev == NULL); 22085240Snordmark ASSERT(connp->conn_policy == NULL); 22095240Snordmark ASSERT(connp->conn_fanout == NULL); 22105240Snordmark ASSERT(connp->conn_next == NULL); 22115240Snordmark ASSERT(connp->conn_prev == NULL); 22125240Snordmark ASSERT(connp->conn_oper_pending_ill == NULL); 22135240Snordmark ASSERT(connp->conn_ilg == NULL); 22145240Snordmark ASSERT(connp->conn_drain_next == NULL); 22155240Snordmark ASSERT(connp->conn_drain_prev == NULL); 22165277Snordmark #ifdef notdef 22175277Snordmark /* conn_idl is not cleared when removed from idl list */ 22185240Snordmark ASSERT(connp->conn_idl == NULL); 22195277Snordmark #endif 22205240Snordmark ASSERT(connp->conn_ipsec_opt_mp == NULL); 222111042SErik.Nordmark@Sun.COM #ifdef notdef 222211042SErik.Nordmark@Sun.COM /* conn_netstack is cleared by the caller; needed by ixa_cleanup */ 22235240Snordmark ASSERT(connp->conn_netstack == NULL); 222411042SErik.Nordmark@Sun.COM #endif 22255240Snordmark 22268348SEric.Yu@Sun.COM ASSERT(connp->conn_helper_info == NULL); 222711042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa != NULL); 222811042SErik.Nordmark@Sun.COM ixa = connp->conn_ixa; 222911042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_refcnt == 1); 223011042SErik.Nordmark@Sun.COM /* Need to preserve ixa_protocol */ 223111042SErik.Nordmark@Sun.COM ixa_cleanup(ixa); 223211042SErik.Nordmark@Sun.COM ixa->ixa_flags = 0; 223311042SErik.Nordmark@Sun.COM 22345240Snordmark /* Clear out the conn_t fields that are not preserved */ 22355240Snordmark bzero(&connp->conn_start_clr, 22365240Snordmark sizeof (conn_t) - 22375240Snordmark ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 22380Sstevel@tonic-gate } 22390Sstevel@tonic-gate 22400Sstevel@tonic-gate /* 22410Sstevel@tonic-gate * All conns are inserted in a global multi-list for the benefit of 22420Sstevel@tonic-gate * walkers. The walk is guaranteed to walk all open conns at the time 22430Sstevel@tonic-gate * of the start of the walk exactly once. This property is needed to 22440Sstevel@tonic-gate * achieve some cleanups during unplumb of interfaces. This is achieved 22450Sstevel@tonic-gate * as follows. 22460Sstevel@tonic-gate * 22470Sstevel@tonic-gate * ipcl_conn_create and ipcl_conn_destroy are the only functions that 22480Sstevel@tonic-gate * call the insert and delete functions below at creation and deletion 22490Sstevel@tonic-gate * time respectively. The conn never moves or changes its position in this 22500Sstevel@tonic-gate * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 22510Sstevel@tonic-gate * won't increase due to walkers, once the conn deletion has started. Note 22520Sstevel@tonic-gate * that we can't remove the conn from the global list and then wait for 22530Sstevel@tonic-gate * the refcnt to drop to zero, since walkers would then see a truncated 22540Sstevel@tonic-gate * list. CONN_INCIPIENT ensures that walkers don't start looking at 22550Sstevel@tonic-gate * conns until ip_open is ready to make them globally visible. 22560Sstevel@tonic-gate * The global round robin multi-list locks are held only to get the 22570Sstevel@tonic-gate * next member/insertion/deletion and contention should be negligible 22580Sstevel@tonic-gate * if the multi-list is much greater than the number of cpus. 22590Sstevel@tonic-gate */ 22600Sstevel@tonic-gate void 22610Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp) 22620Sstevel@tonic-gate { 22630Sstevel@tonic-gate int index; 22643448Sdh155122 struct connf_s *connfp; 22653448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 22660Sstevel@tonic-gate 22670Sstevel@tonic-gate /* 22680Sstevel@tonic-gate * No need for atomic here. Approximate even distribution 22690Sstevel@tonic-gate * in the global lists is sufficient. 22700Sstevel@tonic-gate */ 22713448Sdh155122 ipst->ips_conn_g_index++; 22723448Sdh155122 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 22730Sstevel@tonic-gate 22740Sstevel@tonic-gate connp->conn_g_prev = NULL; 22750Sstevel@tonic-gate /* 22760Sstevel@tonic-gate * Mark as INCIPIENT, so that walkers will ignore this 22770Sstevel@tonic-gate * for now, till ip_open is ready to make it visible globally. 22780Sstevel@tonic-gate */ 22790Sstevel@tonic-gate connp->conn_state_flags |= CONN_INCIPIENT; 22800Sstevel@tonic-gate 22813448Sdh155122 connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 22820Sstevel@tonic-gate /* Insert at the head of the list */ 22833448Sdh155122 mutex_enter(&connfp->connf_lock); 22843448Sdh155122 connp->conn_g_next = connfp->connf_head; 22850Sstevel@tonic-gate if (connp->conn_g_next != NULL) 22860Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp; 22873448Sdh155122 connfp->connf_head = connp; 22880Sstevel@tonic-gate 22890Sstevel@tonic-gate /* The fanout bucket this conn points to */ 22903448Sdh155122 connp->conn_g_fanout = connfp; 22910Sstevel@tonic-gate 22923448Sdh155122 mutex_exit(&connfp->connf_lock); 22930Sstevel@tonic-gate } 22940Sstevel@tonic-gate 22950Sstevel@tonic-gate void 22960Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp) 22970Sstevel@tonic-gate { 22983448Sdh155122 struct connf_s *connfp; 22993448Sdh155122 23000Sstevel@tonic-gate /* 23010Sstevel@tonic-gate * We were never inserted in the global multi list. 23020Sstevel@tonic-gate * IPCL_NONE variety is never inserted in the global multilist 23030Sstevel@tonic-gate * since it is presumed to not need any cleanup and is transient. 23040Sstevel@tonic-gate */ 23050Sstevel@tonic-gate if (connp->conn_g_fanout == NULL) 23060Sstevel@tonic-gate return; 23070Sstevel@tonic-gate 23083448Sdh155122 connfp = connp->conn_g_fanout; 23093448Sdh155122 mutex_enter(&connfp->connf_lock); 23100Sstevel@tonic-gate if (connp->conn_g_prev != NULL) 23110Sstevel@tonic-gate connp->conn_g_prev->conn_g_next = connp->conn_g_next; 23120Sstevel@tonic-gate else 23133448Sdh155122 connfp->connf_head = connp->conn_g_next; 23140Sstevel@tonic-gate if (connp->conn_g_next != NULL) 23150Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 23163448Sdh155122 mutex_exit(&connfp->connf_lock); 23170Sstevel@tonic-gate 23180Sstevel@tonic-gate /* Better to stumble on a null pointer than to corrupt memory */ 23190Sstevel@tonic-gate connp->conn_g_next = NULL; 23200Sstevel@tonic-gate connp->conn_g_prev = NULL; 23215240Snordmark connp->conn_g_fanout = NULL; 23220Sstevel@tonic-gate } 23230Sstevel@tonic-gate 23240Sstevel@tonic-gate /* 23250Sstevel@tonic-gate * Walk the list of all conn_t's in the system, calling the function provided 232611042SErik.Nordmark@Sun.COM * With the specified argument for each. 23270Sstevel@tonic-gate * Applies to both IPv4 and IPv6. 23280Sstevel@tonic-gate * 232911042SErik.Nordmark@Sun.COM * CONNs may hold pointers to ills (conn_dhcpinit_ill and 233011042SErik.Nordmark@Sun.COM * conn_oper_pending_ill). To guard against stale pointers 23310Sstevel@tonic-gate * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 23320Sstevel@tonic-gate * unplumbed or removed. New conn_t's that are created while we are walking 23330Sstevel@tonic-gate * may be missed by this walk, because they are not necessarily inserted 23340Sstevel@tonic-gate * at the tail of the list. They are new conn_t's and thus don't have any 23350Sstevel@tonic-gate * stale pointers. The CONN_CLOSING flag ensures that no new reference 23360Sstevel@tonic-gate * is created to the struct that is going away. 23370Sstevel@tonic-gate */ 23380Sstevel@tonic-gate void 23393448Sdh155122 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 23400Sstevel@tonic-gate { 23410Sstevel@tonic-gate int i; 23420Sstevel@tonic-gate conn_t *connp; 23430Sstevel@tonic-gate conn_t *prev_connp; 23440Sstevel@tonic-gate 23450Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 23463448Sdh155122 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23470Sstevel@tonic-gate prev_connp = NULL; 23483448Sdh155122 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 23490Sstevel@tonic-gate while (connp != NULL) { 23500Sstevel@tonic-gate mutex_enter(&connp->conn_lock); 23510Sstevel@tonic-gate if (connp->conn_state_flags & 23520Sstevel@tonic-gate (CONN_CONDEMNED | CONN_INCIPIENT)) { 23530Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 23540Sstevel@tonic-gate connp = connp->conn_g_next; 23550Sstevel@tonic-gate continue; 23560Sstevel@tonic-gate } 23570Sstevel@tonic-gate CONN_INC_REF_LOCKED(connp); 23580Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 23593448Sdh155122 mutex_exit( 23603448Sdh155122 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23610Sstevel@tonic-gate (*func)(connp, arg); 23620Sstevel@tonic-gate if (prev_connp != NULL) 23630Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 23643448Sdh155122 mutex_enter( 23653448Sdh155122 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23660Sstevel@tonic-gate prev_connp = connp; 23670Sstevel@tonic-gate connp = connp->conn_g_next; 23680Sstevel@tonic-gate } 23693448Sdh155122 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23700Sstevel@tonic-gate if (prev_connp != NULL) 23710Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 23720Sstevel@tonic-gate } 23730Sstevel@tonic-gate } 23740Sstevel@tonic-gate 23750Sstevel@tonic-gate /* 23760Sstevel@tonic-gate * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 23770Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 23780Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 23792323Sethindra * (peer tcp in ESTABLISHED state). 23800Sstevel@tonic-gate */ 23810Sstevel@tonic-gate conn_t * 238211042SErik.Nordmark@Sun.COM ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha, 23833448Sdh155122 ip_stack_t *ipst) 23840Sstevel@tonic-gate { 23850Sstevel@tonic-gate uint32_t ports; 23860Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 23870Sstevel@tonic-gate connf_t *connfp; 23880Sstevel@tonic-gate conn_t *tconnp; 23890Sstevel@tonic-gate boolean_t zone_chk; 23900Sstevel@tonic-gate 23910Sstevel@tonic-gate /* 23920Sstevel@tonic-gate * If either the source of destination address is loopback, then 23930Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 23940Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 23950Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. 23960Sstevel@tonic-gate */ 23970Sstevel@tonic-gate zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 23980Sstevel@tonic-gate ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 23990Sstevel@tonic-gate 240011042SErik.Nordmark@Sun.COM pports[0] = tcpha->tha_fport; 240111042SErik.Nordmark@Sun.COM pports[1] = tcpha->tha_lport; 24020Sstevel@tonic-gate 24033448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 24043448Sdh155122 ports, ipst)]; 24050Sstevel@tonic-gate 24060Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 24070Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 24080Sstevel@tonic-gate tconnp = tconnp->conn_next) { 24090Sstevel@tonic-gate 24100Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 24110Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 24122323Sethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 24130Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 24140Sstevel@tonic-gate 24150Sstevel@tonic-gate ASSERT(tconnp != connp); 24160Sstevel@tonic-gate CONN_INC_REF(tconnp); 24170Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24180Sstevel@tonic-gate return (tconnp); 24190Sstevel@tonic-gate } 24200Sstevel@tonic-gate } 24210Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24220Sstevel@tonic-gate return (NULL); 24230Sstevel@tonic-gate } 24240Sstevel@tonic-gate 24250Sstevel@tonic-gate /* 24260Sstevel@tonic-gate * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 24270Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 24280Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 24292323Sethindra * (peer tcp in ESTABLISHED state). 24300Sstevel@tonic-gate */ 24310Sstevel@tonic-gate conn_t * 243211042SErik.Nordmark@Sun.COM ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha, 24333448Sdh155122 ip_stack_t *ipst) 24340Sstevel@tonic-gate { 24350Sstevel@tonic-gate uint32_t ports; 24360Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 24370Sstevel@tonic-gate connf_t *connfp; 24380Sstevel@tonic-gate conn_t *tconnp; 24390Sstevel@tonic-gate boolean_t zone_chk; 24400Sstevel@tonic-gate 24410Sstevel@tonic-gate /* 24420Sstevel@tonic-gate * If either the source of destination address is loopback, then 24430Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 24440Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 24450Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. We 24460Sstevel@tonic-gate * don't do Zone check for link local address(es) because the 24470Sstevel@tonic-gate * current Zone implementation treats each link local address as 24480Sstevel@tonic-gate * being unique per system node, i.e. they belong to global Zone. 24490Sstevel@tonic-gate */ 24500Sstevel@tonic-gate zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 24510Sstevel@tonic-gate IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 24520Sstevel@tonic-gate 245311042SErik.Nordmark@Sun.COM pports[0] = tcpha->tha_fport; 245411042SErik.Nordmark@Sun.COM pports[1] = tcpha->tha_lport; 24550Sstevel@tonic-gate 24563448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 24573448Sdh155122 ports, ipst)]; 24580Sstevel@tonic-gate 24590Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 24600Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 24610Sstevel@tonic-gate tconnp = tconnp->conn_next) { 24620Sstevel@tonic-gate 246311042SErik.Nordmark@Sun.COM /* We skip conn_bound_if check here as this is loopback tcp */ 24640Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 24650Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 24662323Sethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 24670Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 24680Sstevel@tonic-gate 24690Sstevel@tonic-gate ASSERT(tconnp != connp); 24700Sstevel@tonic-gate CONN_INC_REF(tconnp); 24710Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24720Sstevel@tonic-gate return (tconnp); 24730Sstevel@tonic-gate } 24740Sstevel@tonic-gate } 24750Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24760Sstevel@tonic-gate return (NULL); 24770Sstevel@tonic-gate } 24780Sstevel@tonic-gate 24790Sstevel@tonic-gate /* 24800Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 24810Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 24820Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 24830Sstevel@tonic-gate */ 24840Sstevel@tonic-gate conn_t * 248511042SErik.Nordmark@Sun.COM ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state, 24863448Sdh155122 ip_stack_t *ipst) 24870Sstevel@tonic-gate { 24880Sstevel@tonic-gate uint32_t ports; 24890Sstevel@tonic-gate uint16_t *pports; 24900Sstevel@tonic-gate connf_t *connfp; 24910Sstevel@tonic-gate conn_t *tconnp; 24920Sstevel@tonic-gate 24930Sstevel@tonic-gate pports = (uint16_t *)&ports; 249411042SErik.Nordmark@Sun.COM pports[0] = tcpha->tha_fport; 249511042SErik.Nordmark@Sun.COM pports[1] = tcpha->tha_lport; 24960Sstevel@tonic-gate 24973448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 24984691Skcpoon ports, ipst)]; 24990Sstevel@tonic-gate 25000Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25010Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25020Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25030Sstevel@tonic-gate 25040Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 25050Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 25060Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= min_state) { 25070Sstevel@tonic-gate 25080Sstevel@tonic-gate CONN_INC_REF(tconnp); 25090Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25100Sstevel@tonic-gate return (tconnp); 25110Sstevel@tonic-gate } 25120Sstevel@tonic-gate } 25130Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25140Sstevel@tonic-gate return (NULL); 25150Sstevel@tonic-gate } 25160Sstevel@tonic-gate 25170Sstevel@tonic-gate /* 25180Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 25190Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 25200Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 25210Sstevel@tonic-gate * Match on ifindex in addition to addresses. 25220Sstevel@tonic-gate */ 25230Sstevel@tonic-gate conn_t * 25240Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 25253448Sdh155122 uint_t ifindex, ip_stack_t *ipst) 25260Sstevel@tonic-gate { 25270Sstevel@tonic-gate tcp_t *tcp; 25280Sstevel@tonic-gate uint32_t ports; 25290Sstevel@tonic-gate uint16_t *pports; 25300Sstevel@tonic-gate connf_t *connfp; 25310Sstevel@tonic-gate conn_t *tconnp; 25320Sstevel@tonic-gate 25330Sstevel@tonic-gate pports = (uint16_t *)&ports; 25340Sstevel@tonic-gate pports[0] = tcpha->tha_fport; 25350Sstevel@tonic-gate pports[1] = tcpha->tha_lport; 25360Sstevel@tonic-gate 25373448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 25384691Skcpoon ports, ipst)]; 25390Sstevel@tonic-gate 25400Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25410Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25420Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25430Sstevel@tonic-gate 25440Sstevel@tonic-gate tcp = tconnp->conn_tcp; 25450Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 25460Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 25470Sstevel@tonic-gate tcp->tcp_state >= min_state && 254811042SErik.Nordmark@Sun.COM (tconnp->conn_bound_if == 0 || 254911042SErik.Nordmark@Sun.COM tconnp->conn_bound_if == ifindex)) { 25500Sstevel@tonic-gate 25510Sstevel@tonic-gate CONN_INC_REF(tconnp); 25520Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25530Sstevel@tonic-gate return (tconnp); 25540Sstevel@tonic-gate } 25550Sstevel@tonic-gate } 25560Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25570Sstevel@tonic-gate return (NULL); 25580Sstevel@tonic-gate } 25590Sstevel@tonic-gate 25600Sstevel@tonic-gate /* 25611676Sjpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 25621676Sjpk * a listener when changing state. 25630Sstevel@tonic-gate */ 25640Sstevel@tonic-gate conn_t * 25653448Sdh155122 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 25663448Sdh155122 ip_stack_t *ipst) 25670Sstevel@tonic-gate { 25680Sstevel@tonic-gate connf_t *bind_connfp; 25690Sstevel@tonic-gate conn_t *connp; 25700Sstevel@tonic-gate tcp_t *tcp; 25710Sstevel@tonic-gate 25720Sstevel@tonic-gate /* 25730Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 25740Sstevel@tonic-gate * all zeros. 25750Sstevel@tonic-gate */ 25760Sstevel@tonic-gate if (laddr == 0) 25770Sstevel@tonic-gate return (NULL); 25780Sstevel@tonic-gate 25791676Sjpk ASSERT(zoneid != ALL_ZONES); 25801676Sjpk 25813448Sdh155122 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 25820Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 25830Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 25840Sstevel@tonic-gate connp = connp->conn_next) { 25850Sstevel@tonic-gate tcp = connp->conn_tcp; 25860Sstevel@tonic-gate if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 25872263Ssommerfe IPCL_ZONE_MATCH(connp, zoneid) && 25880Sstevel@tonic-gate (tcp->tcp_listener == NULL)) { 25890Sstevel@tonic-gate CONN_INC_REF(connp); 25900Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 25910Sstevel@tonic-gate return (connp); 25920Sstevel@tonic-gate } 25930Sstevel@tonic-gate } 25940Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 25950Sstevel@tonic-gate return (NULL); 25960Sstevel@tonic-gate } 25970Sstevel@tonic-gate 25981676Sjpk /* 25991676Sjpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 26001676Sjpk * a listener when changing state. 26011676Sjpk */ 26020Sstevel@tonic-gate conn_t * 26030Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 26043448Sdh155122 zoneid_t zoneid, ip_stack_t *ipst) 26050Sstevel@tonic-gate { 26060Sstevel@tonic-gate connf_t *bind_connfp; 26070Sstevel@tonic-gate conn_t *connp = NULL; 26080Sstevel@tonic-gate tcp_t *tcp; 26090Sstevel@tonic-gate 26100Sstevel@tonic-gate /* 26110Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 26120Sstevel@tonic-gate * all zeros. 26130Sstevel@tonic-gate */ 26140Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 26150Sstevel@tonic-gate return (NULL); 26160Sstevel@tonic-gate 26171676Sjpk ASSERT(zoneid != ALL_ZONES); 26180Sstevel@tonic-gate 26193448Sdh155122 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 26200Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 26210Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 26220Sstevel@tonic-gate connp = connp->conn_next) { 26230Sstevel@tonic-gate tcp = connp->conn_tcp; 26240Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 26252263Ssommerfe IPCL_ZONE_MATCH(connp, zoneid) && 262611042SErik.Nordmark@Sun.COM (connp->conn_bound_if == 0 || 262711042SErik.Nordmark@Sun.COM connp->conn_bound_if == ifindex) && 26280Sstevel@tonic-gate tcp->tcp_listener == NULL) { 26290Sstevel@tonic-gate CONN_INC_REF(connp); 26300Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26310Sstevel@tonic-gate return (connp); 26320Sstevel@tonic-gate } 26330Sstevel@tonic-gate } 26340Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26350Sstevel@tonic-gate return (NULL); 26360Sstevel@tonic-gate } 26370Sstevel@tonic-gate 2638741Smasputra /* 2639741Smasputra * ipcl_get_next_conn 2640741Smasputra * get the next entry in the conn global list 2641741Smasputra * and put a reference on the next_conn. 2642741Smasputra * decrement the reference on the current conn. 2643741Smasputra * 2644741Smasputra * This is an iterator based walker function that also provides for 2645741Smasputra * some selection by the caller. It walks through the conn_hash bucket 2646741Smasputra * searching for the next valid connp in the list, and selects connections 2647741Smasputra * that are neither closed nor condemned. It also REFHOLDS the conn 2648741Smasputra * thus ensuring that the conn exists when the caller uses the conn. 2649741Smasputra */ 2650741Smasputra conn_t * 2651741Smasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2652741Smasputra { 2653741Smasputra conn_t *next_connp; 2654741Smasputra 2655741Smasputra if (connfp == NULL) 2656741Smasputra return (NULL); 2657741Smasputra 2658741Smasputra mutex_enter(&connfp->connf_lock); 2659741Smasputra 2660741Smasputra next_connp = (connp == NULL) ? 2661741Smasputra connfp->connf_head : connp->conn_g_next; 2662741Smasputra 2663741Smasputra while (next_connp != NULL) { 2664741Smasputra mutex_enter(&next_connp->conn_lock); 2665741Smasputra if (!(next_connp->conn_flags & conn_flags) || 2666741Smasputra (next_connp->conn_state_flags & 2667741Smasputra (CONN_CONDEMNED | CONN_INCIPIENT))) { 2668741Smasputra /* 2669741Smasputra * This conn has been condemned or 2670741Smasputra * is closing, or the flags don't match 2671741Smasputra */ 2672741Smasputra mutex_exit(&next_connp->conn_lock); 2673741Smasputra next_connp = next_connp->conn_g_next; 2674741Smasputra continue; 2675741Smasputra } 2676741Smasputra CONN_INC_REF_LOCKED(next_connp); 2677741Smasputra mutex_exit(&next_connp->conn_lock); 2678741Smasputra break; 2679741Smasputra } 2680741Smasputra 2681741Smasputra mutex_exit(&connfp->connf_lock); 2682741Smasputra 2683741Smasputra if (connp != NULL) 2684741Smasputra CONN_DEC_REF(connp); 2685741Smasputra 2686741Smasputra return (next_connp); 2687741Smasputra } 2688741Smasputra 26890Sstevel@tonic-gate #ifdef CONN_DEBUG 26900Sstevel@tonic-gate /* 26910Sstevel@tonic-gate * Trace of the last NBUF refhold/refrele 26920Sstevel@tonic-gate */ 26930Sstevel@tonic-gate int 26940Sstevel@tonic-gate conn_trace_ref(conn_t *connp) 26950Sstevel@tonic-gate { 26960Sstevel@tonic-gate int last; 26970Sstevel@tonic-gate conn_trace_t *ctb; 26980Sstevel@tonic-gate 26990Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 27000Sstevel@tonic-gate last = connp->conn_trace_last; 27010Sstevel@tonic-gate last++; 27020Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 27030Sstevel@tonic-gate last = 0; 27040Sstevel@tonic-gate 27050Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 27065023Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 27070Sstevel@tonic-gate connp->conn_trace_last = last; 27080Sstevel@tonic-gate return (1); 27090Sstevel@tonic-gate } 27100Sstevel@tonic-gate 27110Sstevel@tonic-gate int 27120Sstevel@tonic-gate conn_untrace_ref(conn_t *connp) 27130Sstevel@tonic-gate { 27140Sstevel@tonic-gate int last; 27150Sstevel@tonic-gate conn_trace_t *ctb; 27160Sstevel@tonic-gate 27170Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 27180Sstevel@tonic-gate last = connp->conn_trace_last; 27190Sstevel@tonic-gate last++; 27200Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 27210Sstevel@tonic-gate last = 0; 27220Sstevel@tonic-gate 27230Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 27245023Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 27250Sstevel@tonic-gate connp->conn_trace_last = last; 27260Sstevel@tonic-gate return (1); 27270Sstevel@tonic-gate } 27280Sstevel@tonic-gate #endif 2729