10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51503Sericheng * Common Development and Distribution License (the "License").
61503Sericheng * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
2212056SKacheong.Poon@Sun.COM * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate */
240Sstevel@tonic-gate
250Sstevel@tonic-gate /*
260Sstevel@tonic-gate * IP PACKET CLASSIFIER
270Sstevel@tonic-gate *
280Sstevel@tonic-gate * The IP packet classifier provides mapping between IP packets and persistent
290Sstevel@tonic-gate * connection state for connection-oriented protocols. It also provides
300Sstevel@tonic-gate * interface for managing connection states.
310Sstevel@tonic-gate *
320Sstevel@tonic-gate * The connection state is kept in conn_t data structure and contains, among
330Sstevel@tonic-gate * other things:
340Sstevel@tonic-gate *
350Sstevel@tonic-gate * o local/remote address and ports
360Sstevel@tonic-gate * o Transport protocol
370Sstevel@tonic-gate * o squeue for the connection (for TCP only)
380Sstevel@tonic-gate * o reference counter
390Sstevel@tonic-gate * o Connection state
400Sstevel@tonic-gate * o hash table linkage
410Sstevel@tonic-gate * o interface/ire information
420Sstevel@tonic-gate * o credentials
430Sstevel@tonic-gate * o ipsec policy
440Sstevel@tonic-gate * o send and receive functions.
450Sstevel@tonic-gate * o mutex lock.
460Sstevel@tonic-gate *
470Sstevel@tonic-gate * Connections use a reference counting scheme. They are freed when the
480Sstevel@tonic-gate * reference counter drops to zero. A reference is incremented when connection
490Sstevel@tonic-gate * is placed in a list or table, when incoming packet for the connection arrives
500Sstevel@tonic-gate * and when connection is processed via squeue (squeue processing may be
510Sstevel@tonic-gate * asynchronous and the reference protects the connection from being destroyed
520Sstevel@tonic-gate * before its processing is finished).
530Sstevel@tonic-gate *
5411042SErik.Nordmark@Sun.COM * conn_recv is used to pass up packets to the ULP.
5511042SErik.Nordmark@Sun.COM * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
5611042SErik.Nordmark@Sun.COM * a listener, and changes to tcp_input_listener as the listener has picked a
5711042SErik.Nordmark@Sun.COM * good squeue. For other cases it is set to tcp_input_data.
5811042SErik.Nordmark@Sun.COM *
5911042SErik.Nordmark@Sun.COM * conn_recvicmp is used to pass up ICMP errors to the ULP.
600Sstevel@tonic-gate *
610Sstevel@tonic-gate * Classifier uses several hash tables:
620Sstevel@tonic-gate *
630Sstevel@tonic-gate * ipcl_conn_fanout: contains all TCP connections in CONNECTED state
640Sstevel@tonic-gate * ipcl_bind_fanout: contains all connections in BOUND state
650Sstevel@tonic-gate * ipcl_proto_fanout: IPv4 protocol fanout
660Sstevel@tonic-gate * ipcl_proto_fanout_v6: IPv6 protocol fanout
670Sstevel@tonic-gate * ipcl_udp_fanout: contains all UDP connections
6810616SSebastien.Roy@Sun.COM * ipcl_iptun_fanout: contains all IP tunnel connections
690Sstevel@tonic-gate * ipcl_globalhash_fanout: contains all connections
700Sstevel@tonic-gate *
710Sstevel@tonic-gate * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
720Sstevel@tonic-gate * which need to view all existing connections.
730Sstevel@tonic-gate *
740Sstevel@tonic-gate * All tables are protected by per-bucket locks. When both per-bucket lock and
750Sstevel@tonic-gate * connection lock need to be held, the per-bucket lock should be acquired
760Sstevel@tonic-gate * first, followed by the connection lock.
770Sstevel@tonic-gate *
780Sstevel@tonic-gate * All functions doing search in one of these tables increment a reference
790Sstevel@tonic-gate * counter on the connection found (if any). This reference should be dropped
800Sstevel@tonic-gate * when the caller has finished processing the connection.
810Sstevel@tonic-gate *
820Sstevel@tonic-gate *
830Sstevel@tonic-gate * INTERFACES:
840Sstevel@tonic-gate * ===========
850Sstevel@tonic-gate *
860Sstevel@tonic-gate * Connection Lookup:
870Sstevel@tonic-gate * ------------------
880Sstevel@tonic-gate *
8911042SErik.Nordmark@Sun.COM * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
9011042SErik.Nordmark@Sun.COM * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
910Sstevel@tonic-gate *
920Sstevel@tonic-gate * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
930Sstevel@tonic-gate * it can't find any associated connection. If the connection is found, its
940Sstevel@tonic-gate * reference counter is incremented.
950Sstevel@tonic-gate *
960Sstevel@tonic-gate * mp: mblock, containing packet header. The full header should fit
970Sstevel@tonic-gate * into a single mblock. It should also contain at least full IP
980Sstevel@tonic-gate * and TCP or UDP header.
990Sstevel@tonic-gate *
1000Sstevel@tonic-gate * protocol: Either IPPROTO_TCP or IPPROTO_UDP.
1010Sstevel@tonic-gate *
1020Sstevel@tonic-gate * hdr_len: The size of IP header. It is used to find TCP or UDP header in
1030Sstevel@tonic-gate * the packet.
1040Sstevel@tonic-gate *
10511042SErik.Nordmark@Sun.COM * ira->ira_zoneid: The zone in which the returned connection must be; the
10611042SErik.Nordmark@Sun.COM * zoneid corresponding to the ire_zoneid on the IRE located for
10711042SErik.Nordmark@Sun.COM * the packet's destination address.
10811042SErik.Nordmark@Sun.COM *
10911042SErik.Nordmark@Sun.COM * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
11011042SErik.Nordmark@Sun.COM * IRAF_TX_SHARED_ADDR flags
1110Sstevel@tonic-gate *
1120Sstevel@tonic-gate * For TCP connections, the lookup order is as follows:
1130Sstevel@tonic-gate * 5-tuple {src, dst, protocol, local port, remote port}
1140Sstevel@tonic-gate * lookup in ipcl_conn_fanout table.
1150Sstevel@tonic-gate * 3-tuple {dst, remote port, protocol} lookup in
1160Sstevel@tonic-gate * ipcl_bind_fanout table.
1170Sstevel@tonic-gate *
1180Sstevel@tonic-gate * For UDP connections, a 5-tuple {src, dst, protocol, local port,
1190Sstevel@tonic-gate * remote port} lookup is done on ipcl_udp_fanout. Note that,
1200Sstevel@tonic-gate * these interfaces do not handle cases where a packets belongs
1210Sstevel@tonic-gate * to multiple UDP clients, which is handled in IP itself.
1220Sstevel@tonic-gate *
1231676Sjpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
1241676Sjpk * determine which actual zone gets the segment. This is used only in a
1251676Sjpk * labeled environment. The matching rules are:
1261676Sjpk *
1271676Sjpk * - If it's not a multilevel port, then the label on the packet selects
1281676Sjpk * the zone. Unlabeled packets are delivered to the global zone.
1291676Sjpk *
1301676Sjpk * - If it's a multilevel port, then only the zone registered to receive
1311676Sjpk * packets on that port matches.
1321676Sjpk *
1331676Sjpk * Also, in a labeled environment, packet labels need to be checked. For fully
1341676Sjpk * bound TCP connections, we can assume that the packet label was checked
1351676Sjpk * during connection establishment, and doesn't need to be checked on each
1361676Sjpk * packet. For others, though, we need to check for strict equality or, for
1371676Sjpk * multilevel ports, membership in the range or set. This part currently does
1381676Sjpk * a tnrh lookup on each packet, but could be optimized to use cached results
1391676Sjpk * if that were necessary. (SCTP doesn't come through here, but if it did,
1401676Sjpk * we would apply the same rules as TCP.)
1411676Sjpk *
1421676Sjpk * An implication of the above is that fully-bound TCP sockets must always use
1431676Sjpk * distinct 4-tuples; they can't be discriminated by label alone.
1441676Sjpk *
1451676Sjpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
1461676Sjpk * as there's no connection set-up handshake and no shared state.
1471676Sjpk *
1481676Sjpk * Labels on looped-back packets within a single zone do not need to be
1491676Sjpk * checked, as all processes in the same zone have the same label.
1501676Sjpk *
1511676Sjpk * Finally, for unlabeled packets received by a labeled system, special rules
1521676Sjpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a
1531676Sjpk * socket in the zone whose label matches the default label of the sender, if
1541676Sjpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the
1551676Sjpk * receiver's label must dominate the sender's default label.
1561676Sjpk *
15711042SErik.Nordmark@Sun.COM * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
1583448Sdh155122 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
1593448Sdh155122 * ip_stack);
1600Sstevel@tonic-gate *
1610Sstevel@tonic-gate * Lookup routine to find a exact match for {src, dst, local port,
1620Sstevel@tonic-gate * remote port) for TCP connections in ipcl_conn_fanout. The address and
1630Sstevel@tonic-gate * ports are read from the IP and TCP header respectively.
1640Sstevel@tonic-gate *
1653448Sdh155122 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol,
1663448Sdh155122 * zoneid, ip_stack);
1673448Sdh155122 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
1683448Sdh155122 * zoneid, ip_stack);
1690Sstevel@tonic-gate *
1700Sstevel@tonic-gate * Lookup routine to find a listener with the tuple {lport, laddr,
1710Sstevel@tonic-gate * protocol} in the ipcl_bind_fanout table. For IPv6, an additional
1720Sstevel@tonic-gate * parameter interface index is also compared.
1730Sstevel@tonic-gate *
1743448Sdh155122 * void ipcl_walk(func, arg, ip_stack)
1750Sstevel@tonic-gate *
1760Sstevel@tonic-gate * Apply 'func' to every connection available. The 'func' is called as
1770Sstevel@tonic-gate * (*func)(connp, arg). The walk is non-atomic so connections may be
1780Sstevel@tonic-gate * created and destroyed during the walk. The CONN_CONDEMNED and
1790Sstevel@tonic-gate * CONN_INCIPIENT flags ensure that connections which are newly created
1800Sstevel@tonic-gate * or being destroyed are not selected by the walker.
1810Sstevel@tonic-gate *
1820Sstevel@tonic-gate * Table Updates
1830Sstevel@tonic-gate * -------------
1840Sstevel@tonic-gate *
18511042SErik.Nordmark@Sun.COM * int ipcl_conn_insert(connp);
18611042SErik.Nordmark@Sun.COM * int ipcl_conn_insert_v4(connp);
18711042SErik.Nordmark@Sun.COM * int ipcl_conn_insert_v6(connp);
1880Sstevel@tonic-gate *
1890Sstevel@tonic-gate * Insert 'connp' in the ipcl_conn_fanout.
1900Sstevel@tonic-gate * Arguements :
1910Sstevel@tonic-gate * connp conn_t to be inserted
1920Sstevel@tonic-gate *
1930Sstevel@tonic-gate * Return value :
1940Sstevel@tonic-gate * 0 if connp was inserted
1950Sstevel@tonic-gate * EADDRINUSE if the connection with the same tuple
1960Sstevel@tonic-gate * already exists.
1970Sstevel@tonic-gate *
19811042SErik.Nordmark@Sun.COM * int ipcl_bind_insert(connp);
19911042SErik.Nordmark@Sun.COM * int ipcl_bind_insert_v4(connp);
20011042SErik.Nordmark@Sun.COM * int ipcl_bind_insert_v6(connp);
2010Sstevel@tonic-gate *
2020Sstevel@tonic-gate * Insert 'connp' in ipcl_bind_fanout.
2030Sstevel@tonic-gate * Arguements :
2040Sstevel@tonic-gate * connp conn_t to be inserted
2050Sstevel@tonic-gate *
2060Sstevel@tonic-gate *
2070Sstevel@tonic-gate * void ipcl_hash_remove(connp);
2080Sstevel@tonic-gate *
2090Sstevel@tonic-gate * Removes the 'connp' from the connection fanout table.
2100Sstevel@tonic-gate *
2110Sstevel@tonic-gate * Connection Creation/Destruction
2120Sstevel@tonic-gate * -------------------------------
2130Sstevel@tonic-gate *
2143448Sdh155122 * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
2150Sstevel@tonic-gate *
2160Sstevel@tonic-gate * Creates a new conn based on the type flag, inserts it into
2170Sstevel@tonic-gate * globalhash table.
2180Sstevel@tonic-gate *
2190Sstevel@tonic-gate * type: This flag determines the type of conn_t which needs to be
2205240Snordmark * created i.e., which kmem_cache it comes from.
2210Sstevel@tonic-gate * IPCL_TCPCONN indicates a TCP connection
2225240Snordmark * IPCL_SCTPCONN indicates a SCTP connection
2235240Snordmark * IPCL_UDPCONN indicates a UDP conn_t.
2245240Snordmark * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t.
2255240Snordmark * IPCL_RTSCONN indicates a RTS conn_t.
2265240Snordmark * IPCL_IPCCONN indicates all other connections.
2270Sstevel@tonic-gate *
2280Sstevel@tonic-gate * void ipcl_conn_destroy(connp)
2290Sstevel@tonic-gate *
2300Sstevel@tonic-gate * Destroys the connection state, removes it from the global
2310Sstevel@tonic-gate * connection hash table and frees its memory.
2320Sstevel@tonic-gate */
2330Sstevel@tonic-gate
2340Sstevel@tonic-gate #include <sys/types.h>
2350Sstevel@tonic-gate #include <sys/stream.h>
2360Sstevel@tonic-gate #include <sys/stropts.h>
2370Sstevel@tonic-gate #include <sys/sysmacros.h>
2380Sstevel@tonic-gate #include <sys/strsubr.h>
2390Sstevel@tonic-gate #include <sys/strsun.h>
2400Sstevel@tonic-gate #define _SUN_TPI_VERSION 2
2410Sstevel@tonic-gate #include <sys/ddi.h>
2420Sstevel@tonic-gate #include <sys/cmn_err.h>
2430Sstevel@tonic-gate #include <sys/debug.h>
2440Sstevel@tonic-gate
2450Sstevel@tonic-gate #include <sys/systm.h>
2460Sstevel@tonic-gate #include <sys/param.h>
2470Sstevel@tonic-gate #include <sys/kmem.h>
2480Sstevel@tonic-gate #include <sys/isa_defs.h>
2490Sstevel@tonic-gate #include <inet/common.h>
2500Sstevel@tonic-gate #include <netinet/ip6.h>
2510Sstevel@tonic-gate #include <netinet/icmp6.h>
2520Sstevel@tonic-gate
2530Sstevel@tonic-gate #include <inet/ip.h>
25411042SErik.Nordmark@Sun.COM #include <inet/ip_if.h>
25511042SErik.Nordmark@Sun.COM #include <inet/ip_ire.h>
2560Sstevel@tonic-gate #include <inet/ip6.h>
2570Sstevel@tonic-gate #include <inet/ip_ndp.h>
2588348SEric.Yu@Sun.COM #include <inet/ip_impl.h>
259741Smasputra #include <inet/udp_impl.h>
2600Sstevel@tonic-gate #include <inet/sctp_ip.h>
2613448Sdh155122 #include <inet/sctp/sctp_impl.h>
2625240Snordmark #include <inet/rawip_impl.h>
2635240Snordmark #include <inet/rts_impl.h>
26410616SSebastien.Roy@Sun.COM #include <inet/iptun/iptun_impl.h>
2650Sstevel@tonic-gate
2660Sstevel@tonic-gate #include <sys/cpuvar.h>
2670Sstevel@tonic-gate
2680Sstevel@tonic-gate #include <inet/ipclassifier.h>
2698348SEric.Yu@Sun.COM #include <inet/tcp.h>
2700Sstevel@tonic-gate #include <inet/ipsec_impl.h>
2710Sstevel@tonic-gate
2721676Sjpk #include <sys/tsol/tnet.h>
2738348SEric.Yu@Sun.COM #include <sys/sockio.h>
2741676Sjpk
2753448Sdh155122 /* Old value for compatibility. Setable in /etc/system */
2760Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0;
2770Sstevel@tonic-gate
2783448Sdh155122 /* New value. Zero means choose automatically. Setable in /etc/system */
2790Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0;
2800Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192;
2810Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500;
2820Sstevel@tonic-gate
2830Sstevel@tonic-gate /* bind/udp fanout table size */
2840Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512;
2851503Sericheng uint_t ipcl_udp_fanout_size = 16384;
2860Sstevel@tonic-gate
2870Sstevel@tonic-gate /* Raw socket fanout size. Must be a power of 2. */
2880Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256;
2890Sstevel@tonic-gate
2900Sstevel@tonic-gate /*
29110616SSebastien.Roy@Sun.COM * The IPCL_IPTUN_HASH() function works best with a prime table size. We
29210616SSebastien.Roy@Sun.COM * expect that most large deployments would have hundreds of tunnels, and
29310616SSebastien.Roy@Sun.COM * thousands in the extreme case.
29410616SSebastien.Roy@Sun.COM */
29510616SSebastien.Roy@Sun.COM uint_t ipcl_iptun_fanout_size = 6143;
29610616SSebastien.Roy@Sun.COM
29710616SSebastien.Roy@Sun.COM /*
2980Sstevel@tonic-gate * Power of 2^N Primes useful for hashing for N of 0-28,
2990Sstevel@tonic-gate * these primes are the nearest prime <= 2^N - 2^(N-2).
3000Sstevel@tonic-gate */
3010Sstevel@tonic-gate
3020Sstevel@tonic-gate #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \
3030Sstevel@tonic-gate 6143, 12281, 24571, 49139, 98299, 196597, 393209, \
3040Sstevel@tonic-gate 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \
3050Sstevel@tonic-gate 50331599, 100663291, 201326557, 0}
3060Sstevel@tonic-gate
3070Sstevel@tonic-gate /*
3085240Snordmark * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
3095240Snordmark * are aligned on cache lines.
3100Sstevel@tonic-gate */
3115240Snordmark typedef union itc_s {
3125240Snordmark conn_t itc_conn;
3135240Snordmark char itcu_filler[CACHE_ALIGN(conn_s)];
3140Sstevel@tonic-gate } itc_t;
3150Sstevel@tonic-gate
3165240Snordmark struct kmem_cache *tcp_conn_cache;
3175240Snordmark struct kmem_cache *ip_conn_cache;
3180Sstevel@tonic-gate extern struct kmem_cache *sctp_conn_cache;
3195240Snordmark struct kmem_cache *udp_conn_cache;
3205240Snordmark struct kmem_cache *rawip_conn_cache;
3215240Snordmark struct kmem_cache *rts_conn_cache;
3220Sstevel@tonic-gate
3230Sstevel@tonic-gate extern void tcp_timermp_free(tcp_t *);
3240Sstevel@tonic-gate extern mblk_t *tcp_timermp_alloc(int);
3250Sstevel@tonic-gate
3265240Snordmark static int ip_conn_constructor(void *, void *, int);
3275240Snordmark static void ip_conn_destructor(void *, void *);
3285240Snordmark
3295240Snordmark static int tcp_conn_constructor(void *, void *, int);
3305240Snordmark static void tcp_conn_destructor(void *, void *);
3315240Snordmark
3325240Snordmark static int udp_conn_constructor(void *, void *, int);
3335240Snordmark static void udp_conn_destructor(void *, void *);
3345240Snordmark
3355240Snordmark static int rawip_conn_constructor(void *, void *, int);
3365240Snordmark static void rawip_conn_destructor(void *, void *);
3375240Snordmark
3385240Snordmark static int rts_conn_constructor(void *, void *, int);
3395240Snordmark static void rts_conn_destructor(void *, void *);
3400Sstevel@tonic-gate
3410Sstevel@tonic-gate /*
3423448Sdh155122 * Global (for all stack instances) init routine
3430Sstevel@tonic-gate */
3440Sstevel@tonic-gate void
ipcl_g_init(void)3453448Sdh155122 ipcl_g_init(void)
3460Sstevel@tonic-gate {
3475240Snordmark ip_conn_cache = kmem_cache_create("ip_conn_cache",
3480Sstevel@tonic-gate sizeof (conn_t), CACHE_ALIGN_SIZE,
3495240Snordmark ip_conn_constructor, ip_conn_destructor,
3505240Snordmark NULL, NULL, NULL, 0);
3515240Snordmark
3525240Snordmark tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
3535240Snordmark sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
3545240Snordmark tcp_conn_constructor, tcp_conn_destructor,
35511303SKacheong.Poon@Sun.COM tcp_conn_reclaim, NULL, NULL, 0);
3560Sstevel@tonic-gate
3575240Snordmark udp_conn_cache = kmem_cache_create("udp_conn_cache",
3585240Snordmark sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
3595240Snordmark udp_conn_constructor, udp_conn_destructor,
3605240Snordmark NULL, NULL, NULL, 0);
3615240Snordmark
3625240Snordmark rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
3635240Snordmark sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
3645240Snordmark rawip_conn_constructor, rawip_conn_destructor,
3655240Snordmark NULL, NULL, NULL, 0);
3665240Snordmark
3675240Snordmark rts_conn_cache = kmem_cache_create("rts_conn_cache",
3685240Snordmark sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
3695240Snordmark rts_conn_constructor, rts_conn_destructor,
3700Sstevel@tonic-gate NULL, NULL, NULL, 0);
3713448Sdh155122 }
3723448Sdh155122
3733448Sdh155122 /*
3743448Sdh155122 * ipclassifier intialization routine, sets up hash tables.
3753448Sdh155122 */
3763448Sdh155122 void
ipcl_init(ip_stack_t * ipst)3773448Sdh155122 ipcl_init(ip_stack_t *ipst)
3783448Sdh155122 {
3793448Sdh155122 int i;
3803448Sdh155122 int sizes[] = P2Ps();
3810Sstevel@tonic-gate
3820Sstevel@tonic-gate /*
3833448Sdh155122 * Calculate size of conn fanout table from /etc/system settings
3840Sstevel@tonic-gate */
3850Sstevel@tonic-gate if (ipcl_conn_hash_size != 0) {
3863448Sdh155122 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
3870Sstevel@tonic-gate } else if (tcp_conn_hash_size != 0) {
3883448Sdh155122 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
3890Sstevel@tonic-gate } else {
3900Sstevel@tonic-gate extern pgcnt_t freemem;
3910Sstevel@tonic-gate
3923448Sdh155122 ipst->ips_ipcl_conn_fanout_size =
3930Sstevel@tonic-gate (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
3940Sstevel@tonic-gate
3953448Sdh155122 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
3963448Sdh155122 ipst->ips_ipcl_conn_fanout_size =
3973448Sdh155122 ipcl_conn_hash_maxsize;
3983448Sdh155122 }
3990Sstevel@tonic-gate }
4000Sstevel@tonic-gate
4010Sstevel@tonic-gate for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
4023448Sdh155122 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
4030Sstevel@tonic-gate break;
4040Sstevel@tonic-gate }
4050Sstevel@tonic-gate }
4063448Sdh155122 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
4070Sstevel@tonic-gate /* Out of range, use the 2^16 value */
4083448Sdh155122 ipst->ips_ipcl_conn_fanout_size = sizes[16];
4090Sstevel@tonic-gate }
4103448Sdh155122
4113448Sdh155122 /* Take values from /etc/system */
4123448Sdh155122 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
4133448Sdh155122 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
4143448Sdh155122 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
41510616SSebastien.Roy@Sun.COM ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
4160Sstevel@tonic-gate
4173448Sdh155122 ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
4183448Sdh155122
4193448Sdh155122 ipst->ips_ipcl_conn_fanout = kmem_zalloc(
4203448Sdh155122 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
4213448Sdh155122
4223448Sdh155122 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
4233448Sdh155122 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
4240Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4250Sstevel@tonic-gate }
4260Sstevel@tonic-gate
4273448Sdh155122 ipst->ips_ipcl_bind_fanout = kmem_zalloc(
4283448Sdh155122 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
4290Sstevel@tonic-gate
4303448Sdh155122 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
4313448Sdh155122 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
4320Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4330Sstevel@tonic-gate }
4340Sstevel@tonic-gate
43511042SErik.Nordmark@Sun.COM ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
4363448Sdh155122 sizeof (connf_t), KM_SLEEP);
4373448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) {
43811042SErik.Nordmark@Sun.COM mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
4390Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4400Sstevel@tonic-gate }
4413448Sdh155122
4423448Sdh155122 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
4433448Sdh155122 sizeof (connf_t), KM_SLEEP);
4443448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) {
4453448Sdh155122 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
4460Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4470Sstevel@tonic-gate }
4480Sstevel@tonic-gate
4493448Sdh155122 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
4503448Sdh155122 mutex_init(&ipst->ips_rts_clients->connf_lock,
4513448Sdh155122 NULL, MUTEX_DEFAULT, NULL);
4520Sstevel@tonic-gate
4533448Sdh155122 ipst->ips_ipcl_udp_fanout = kmem_zalloc(
4543448Sdh155122 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
4553448Sdh155122 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
4563448Sdh155122 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
4570Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4580Sstevel@tonic-gate }
4590Sstevel@tonic-gate
46010616SSebastien.Roy@Sun.COM ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
46110616SSebastien.Roy@Sun.COM ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
46210616SSebastien.Roy@Sun.COM for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
46310616SSebastien.Roy@Sun.COM mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
46410616SSebastien.Roy@Sun.COM MUTEX_DEFAULT, NULL);
46510616SSebastien.Roy@Sun.COM }
46610616SSebastien.Roy@Sun.COM
4673448Sdh155122 ipst->ips_ipcl_raw_fanout = kmem_zalloc(
4683448Sdh155122 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
4693448Sdh155122 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
4703448Sdh155122 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
4710Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate
4743448Sdh155122 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
4753448Sdh155122 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
4760Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4773448Sdh155122 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
4783448Sdh155122 NULL, MUTEX_DEFAULT, NULL);
4790Sstevel@tonic-gate }
4800Sstevel@tonic-gate }
4810Sstevel@tonic-gate
4820Sstevel@tonic-gate void
ipcl_g_destroy(void)4833448Sdh155122 ipcl_g_destroy(void)
4840Sstevel@tonic-gate {
4855240Snordmark kmem_cache_destroy(ip_conn_cache);
4865240Snordmark kmem_cache_destroy(tcp_conn_cache);
4875240Snordmark kmem_cache_destroy(udp_conn_cache);
4885240Snordmark kmem_cache_destroy(rawip_conn_cache);
4895240Snordmark kmem_cache_destroy(rts_conn_cache);
4903448Sdh155122 }
4913448Sdh155122
4923448Sdh155122 /*
4933448Sdh155122 * All user-level and kernel use of the stack must be gone
4943448Sdh155122 * by now.
4953448Sdh155122 */
4963448Sdh155122 void
ipcl_destroy(ip_stack_t * ipst)4973448Sdh155122 ipcl_destroy(ip_stack_t *ipst)
4983448Sdh155122 {
4993448Sdh155122 int i;
5003448Sdh155122
5013448Sdh155122 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
5023448Sdh155122 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
5033448Sdh155122 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
5043448Sdh155122 }
5053448Sdh155122 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
5063448Sdh155122 sizeof (connf_t));
5073448Sdh155122 ipst->ips_ipcl_conn_fanout = NULL;
5083448Sdh155122
5093448Sdh155122 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
5103448Sdh155122 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
5113448Sdh155122 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
5123448Sdh155122 }
5133448Sdh155122 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
5143448Sdh155122 sizeof (connf_t));
5153448Sdh155122 ipst->ips_ipcl_bind_fanout = NULL;
5163448Sdh155122
5173448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) {
51811042SErik.Nordmark@Sun.COM ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
51911042SErik.Nordmark@Sun.COM mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
5203448Sdh155122 }
52111042SErik.Nordmark@Sun.COM kmem_free(ipst->ips_ipcl_proto_fanout_v4,
52211042SErik.Nordmark@Sun.COM IPPROTO_MAX * sizeof (connf_t));
52311042SErik.Nordmark@Sun.COM ipst->ips_ipcl_proto_fanout_v4 = NULL;
5240Sstevel@tonic-gate
5253448Sdh155122 for (i = 0; i < IPPROTO_MAX; i++) {
5263448Sdh155122 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
5273448Sdh155122 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
5283448Sdh155122 }
5293448Sdh155122 kmem_free(ipst->ips_ipcl_proto_fanout_v6,
5303448Sdh155122 IPPROTO_MAX * sizeof (connf_t));
5313448Sdh155122 ipst->ips_ipcl_proto_fanout_v6 = NULL;
5323448Sdh155122
5333448Sdh155122 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
5343448Sdh155122 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
5353448Sdh155122 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
5363448Sdh155122 }
5373448Sdh155122 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
5383448Sdh155122 sizeof (connf_t));
5393448Sdh155122 ipst->ips_ipcl_udp_fanout = NULL;
5400Sstevel@tonic-gate
54110616SSebastien.Roy@Sun.COM for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
54210616SSebastien.Roy@Sun.COM ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
54310616SSebastien.Roy@Sun.COM mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
54410616SSebastien.Roy@Sun.COM }
54510616SSebastien.Roy@Sun.COM kmem_free(ipst->ips_ipcl_iptun_fanout,
54610616SSebastien.Roy@Sun.COM ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
54710616SSebastien.Roy@Sun.COM ipst->ips_ipcl_iptun_fanout = NULL;
54810616SSebastien.Roy@Sun.COM
5493448Sdh155122 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
5503448Sdh155122 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
5513448Sdh155122 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
5523448Sdh155122 }
5533448Sdh155122 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
5543448Sdh155122 sizeof (connf_t));
5553448Sdh155122 ipst->ips_ipcl_raw_fanout = NULL;
5560Sstevel@tonic-gate
5573448Sdh155122 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
5583448Sdh155122 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
5593448Sdh155122 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
5603448Sdh155122 }
5613448Sdh155122 kmem_free(ipst->ips_ipcl_globalhash_fanout,
5623448Sdh155122 sizeof (connf_t) * CONN_G_HASH_SIZE);
5633448Sdh155122 ipst->ips_ipcl_globalhash_fanout = NULL;
5640Sstevel@tonic-gate
5653448Sdh155122 ASSERT(ipst->ips_rts_clients->connf_head == NULL);
5663448Sdh155122 mutex_destroy(&ipst->ips_rts_clients->connf_lock);
5673448Sdh155122 kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
5683448Sdh155122 ipst->ips_rts_clients = NULL;
5690Sstevel@tonic-gate }
5700Sstevel@tonic-gate
5710Sstevel@tonic-gate /*
5720Sstevel@tonic-gate * conn creation routine. initialize the conn, sets the reference
5730Sstevel@tonic-gate * and inserts it in the global hash table.
5740Sstevel@tonic-gate */
5750Sstevel@tonic-gate conn_t *
ipcl_conn_create(uint32_t type,int sleep,netstack_t * ns)5763448Sdh155122 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
5770Sstevel@tonic-gate {
5780Sstevel@tonic-gate conn_t *connp;
5795240Snordmark struct kmem_cache *conn_cache;
5800Sstevel@tonic-gate
5810Sstevel@tonic-gate switch (type) {
5820Sstevel@tonic-gate case IPCL_SCTPCONN:
5830Sstevel@tonic-gate if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
5840Sstevel@tonic-gate return (NULL);
5854691Skcpoon sctp_conn_init(connp);
5863448Sdh155122 netstack_hold(ns);
5873448Sdh155122 connp->conn_netstack = ns;
58811042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_ipst = ns->netstack_ip;
58912507SAlan.Maguire@Sun.COM connp->conn_ixa->ixa_conn_id = (long)connp;
59011042SErik.Nordmark@Sun.COM ipcl_globalhash_insert(connp);
5915240Snordmark return (connp);
5925240Snordmark
5935240Snordmark case IPCL_TCPCONN:
5945240Snordmark conn_cache = tcp_conn_cache;
5950Sstevel@tonic-gate break;
5965240Snordmark
5975240Snordmark case IPCL_UDPCONN:
5985240Snordmark conn_cache = udp_conn_cache;
5995240Snordmark break;
6005240Snordmark
6015240Snordmark case IPCL_RAWIPCONN:
6025240Snordmark conn_cache = rawip_conn_cache;
6035240Snordmark break;
6045240Snordmark
6055240Snordmark case IPCL_RTSCONN:
6065240Snordmark conn_cache = rts_conn_cache;
6075240Snordmark break;
6085240Snordmark
6090Sstevel@tonic-gate case IPCL_IPCCONN:
6105240Snordmark conn_cache = ip_conn_cache;
6110Sstevel@tonic-gate break;
6125240Snordmark
613741Smasputra default:
614741Smasputra connp = NULL;
615741Smasputra ASSERT(0);
6160Sstevel@tonic-gate }
6170Sstevel@tonic-gate
6185240Snordmark if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
6195240Snordmark return (NULL);
6205240Snordmark
6215240Snordmark connp->conn_ref = 1;
6225240Snordmark netstack_hold(ns);
6235240Snordmark connp->conn_netstack = ns;
62411042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_ipst = ns->netstack_ip;
62512507SAlan.Maguire@Sun.COM connp->conn_ixa->ixa_conn_id = (long)connp;
6265240Snordmark ipcl_globalhash_insert(connp);
6270Sstevel@tonic-gate return (connp);
6280Sstevel@tonic-gate }
6290Sstevel@tonic-gate
6300Sstevel@tonic-gate void
ipcl_conn_destroy(conn_t * connp)6310Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp)
6320Sstevel@tonic-gate {
6330Sstevel@tonic-gate mblk_t *mp;
6343448Sdh155122 netstack_t *ns = connp->conn_netstack;
6350Sstevel@tonic-gate
6360Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&connp->conn_lock));
6370Sstevel@tonic-gate ASSERT(connp->conn_ref == 0);
638*12670SRamesh.K@Sun.COM ASSERT(connp->conn_ioctlref == 0);
6390Sstevel@tonic-gate
6407502Saruna@cs.umn.edu DTRACE_PROBE1(conn__destroy, conn_t *, connp);
6417502Saruna@cs.umn.edu
6421676Sjpk if (connp->conn_cred != NULL) {
6431676Sjpk crfree(connp->conn_cred);
6441676Sjpk connp->conn_cred = NULL;
64511680SErik.Nordmark@Sun.COM /* ixa_cred done in ipcl_conn_cleanup below */
6461676Sjpk }
6471676Sjpk
64811042SErik.Nordmark@Sun.COM if (connp->conn_ht_iphc != NULL) {
64911042SErik.Nordmark@Sun.COM kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
65011042SErik.Nordmark@Sun.COM connp->conn_ht_iphc = NULL;
65111042SErik.Nordmark@Sun.COM connp->conn_ht_iphc_allocated = 0;
65211042SErik.Nordmark@Sun.COM connp->conn_ht_iphc_len = 0;
65311042SErik.Nordmark@Sun.COM connp->conn_ht_ulp = NULL;
65411042SErik.Nordmark@Sun.COM connp->conn_ht_ulp_len = 0;
65511042SErik.Nordmark@Sun.COM }
65611042SErik.Nordmark@Sun.COM ip_pkt_free(&connp->conn_xmit_ipp);
65711042SErik.Nordmark@Sun.COM
6580Sstevel@tonic-gate ipcl_globalhash_remove(connp);
6590Sstevel@tonic-gate
66011042SErik.Nordmark@Sun.COM if (connp->conn_latch != NULL) {
66111042SErik.Nordmark@Sun.COM IPLATCH_REFRELE(connp->conn_latch);
66211042SErik.Nordmark@Sun.COM connp->conn_latch = NULL;
66311042SErik.Nordmark@Sun.COM }
66411042SErik.Nordmark@Sun.COM if (connp->conn_latch_in_policy != NULL) {
66511042SErik.Nordmark@Sun.COM IPPOL_REFRELE(connp->conn_latch_in_policy);
66611042SErik.Nordmark@Sun.COM connp->conn_latch_in_policy = NULL;
66711042SErik.Nordmark@Sun.COM }
66811042SErik.Nordmark@Sun.COM if (connp->conn_latch_in_action != NULL) {
66911042SErik.Nordmark@Sun.COM IPACT_REFRELE(connp->conn_latch_in_action);
67011042SErik.Nordmark@Sun.COM connp->conn_latch_in_action = NULL;
67111042SErik.Nordmark@Sun.COM }
67211042SErik.Nordmark@Sun.COM if (connp->conn_policy != NULL) {
67311042SErik.Nordmark@Sun.COM IPPH_REFRELE(connp->conn_policy, ns);
67411042SErik.Nordmark@Sun.COM connp->conn_policy = NULL;
67511042SErik.Nordmark@Sun.COM }
6763448Sdh155122
67711042SErik.Nordmark@Sun.COM if (connp->conn_ipsec_opt_mp != NULL) {
67811042SErik.Nordmark@Sun.COM freemsg(connp->conn_ipsec_opt_mp);
67911042SErik.Nordmark@Sun.COM connp->conn_ipsec_opt_mp = NULL;
68011042SErik.Nordmark@Sun.COM }
68111042SErik.Nordmark@Sun.COM
68211042SErik.Nordmark@Sun.COM if (connp->conn_flags & IPCL_TCPCONN) {
68311042SErik.Nordmark@Sun.COM tcp_t *tcp = connp->conn_tcp;
684741Smasputra
6850Sstevel@tonic-gate tcp_free(tcp);
6860Sstevel@tonic-gate mp = tcp->tcp_timercache;
68711042SErik.Nordmark@Sun.COM
68811042SErik.Nordmark@Sun.COM tcp->tcp_tcps = NULL;
6890Sstevel@tonic-gate
6908014SKacheong.Poon@Sun.COM /*
6918014SKacheong.Poon@Sun.COM * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
6928014SKacheong.Poon@Sun.COM * the mblk.
6938014SKacheong.Poon@Sun.COM */
6948014SKacheong.Poon@Sun.COM if (tcp->tcp_rsrv_mp != NULL) {
6958014SKacheong.Poon@Sun.COM freeb(tcp->tcp_rsrv_mp);
6968014SKacheong.Poon@Sun.COM tcp->tcp_rsrv_mp = NULL;
6978014SKacheong.Poon@Sun.COM mutex_destroy(&tcp->tcp_rsrv_mp_lock);
6988014SKacheong.Poon@Sun.COM }
6998014SKacheong.Poon@Sun.COM
70011042SErik.Nordmark@Sun.COM ipcl_conn_cleanup(connp);
70111042SErik.Nordmark@Sun.COM connp->conn_flags = IPCL_TCPCONN;
7023448Sdh155122 if (ns != NULL) {
7033448Sdh155122 ASSERT(tcp->tcp_tcps == NULL);
7043448Sdh155122 connp->conn_netstack = NULL;
70511042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_ipst = NULL;
7063448Sdh155122 netstack_rele(ns);
7073448Sdh155122 }
7085240Snordmark
7095240Snordmark bzero(tcp, sizeof (tcp_t));
7105240Snordmark
7115240Snordmark tcp->tcp_timercache = mp;
7125240Snordmark tcp->tcp_connp = connp;
7135240Snordmark kmem_cache_free(tcp_conn_cache, connp);
7145240Snordmark return;
7155240Snordmark }
7165240Snordmark
7175240Snordmark if (connp->conn_flags & IPCL_SCTPCONN) {
7183448Sdh155122 ASSERT(ns != NULL);
7190Sstevel@tonic-gate sctp_free(connp);
7205240Snordmark return;
7215240Snordmark }
7225240Snordmark
72311042SErik.Nordmark@Sun.COM ipcl_conn_cleanup(connp);
7245240Snordmark if (ns != NULL) {
7255240Snordmark connp->conn_netstack = NULL;
72611042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_ipst = NULL;
7275240Snordmark netstack_rele(ns);
7285240Snordmark }
7298348SEric.Yu@Sun.COM
7305240Snordmark /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
7315240Snordmark if (connp->conn_flags & IPCL_UDPCONN) {
7325240Snordmark connp->conn_flags = IPCL_UDPCONN;
7335240Snordmark kmem_cache_free(udp_conn_cache, connp);
7345240Snordmark } else if (connp->conn_flags & IPCL_RAWIPCONN) {
7355240Snordmark connp->conn_flags = IPCL_RAWIPCONN;
73611042SErik.Nordmark@Sun.COM connp->conn_proto = IPPROTO_ICMP;
73711042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_protocol = connp->conn_proto;
7385240Snordmark kmem_cache_free(rawip_conn_cache, connp);
7395240Snordmark } else if (connp->conn_flags & IPCL_RTSCONN) {
7405240Snordmark connp->conn_flags = IPCL_RTSCONN;
7415240Snordmark kmem_cache_free(rts_conn_cache, connp);
7420Sstevel@tonic-gate } else {
7435240Snordmark connp->conn_flags = IPCL_IPCCONN;
7445240Snordmark ASSERT(connp->conn_flags & IPCL_IPCCONN);
7455240Snordmark ASSERT(connp->conn_priv == NULL);
7465240Snordmark kmem_cache_free(ip_conn_cache, connp);
7470Sstevel@tonic-gate }
7480Sstevel@tonic-gate }
7490Sstevel@tonic-gate
7500Sstevel@tonic-gate /*
7510Sstevel@tonic-gate * Running in cluster mode - deregister listener information
7520Sstevel@tonic-gate */
7530Sstevel@tonic-gate static void
ipcl_conn_unlisten(conn_t * connp)7540Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp)
7550Sstevel@tonic-gate {
7560Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
7570Sstevel@tonic-gate ASSERT(connp->conn_lport != 0);
7580Sstevel@tonic-gate
7590Sstevel@tonic-gate if (cl_inet_unlisten != NULL) {
7600Sstevel@tonic-gate sa_family_t addr_family;
7610Sstevel@tonic-gate uint8_t *laddrp;
7620Sstevel@tonic-gate
76311042SErik.Nordmark@Sun.COM if (connp->conn_ipversion == IPV6_VERSION) {
7640Sstevel@tonic-gate addr_family = AF_INET6;
76511042SErik.Nordmark@Sun.COM laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
7660Sstevel@tonic-gate } else {
7670Sstevel@tonic-gate addr_family = AF_INET;
76811042SErik.Nordmark@Sun.COM laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
7690Sstevel@tonic-gate }
7708392SHuafeng.Lv@Sun.COM (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
7718392SHuafeng.Lv@Sun.COM IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
7720Sstevel@tonic-gate }
7730Sstevel@tonic-gate connp->conn_flags &= ~IPCL_CL_LISTENER;
7740Sstevel@tonic-gate }
7750Sstevel@tonic-gate
7760Sstevel@tonic-gate /*
7770Sstevel@tonic-gate * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
7780Sstevel@tonic-gate * which table the conn belonged to). So for debugging we can see which hash
7790Sstevel@tonic-gate * table this connection was in.
7800Sstevel@tonic-gate */
7810Sstevel@tonic-gate #define IPCL_HASH_REMOVE(connp) { \
7820Sstevel@tonic-gate connf_t *connfp = (connp)->conn_fanout; \
7830Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \
7840Sstevel@tonic-gate if (connfp != NULL) { \
7850Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); \
7860Sstevel@tonic-gate if ((connp)->conn_next != NULL) \
7870Sstevel@tonic-gate (connp)->conn_next->conn_prev = \
7880Sstevel@tonic-gate (connp)->conn_prev; \
7890Sstevel@tonic-gate if ((connp)->conn_prev != NULL) \
7900Sstevel@tonic-gate (connp)->conn_prev->conn_next = \
7910Sstevel@tonic-gate (connp)->conn_next; \
7920Sstevel@tonic-gate else \
7930Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; \
7940Sstevel@tonic-gate (connp)->conn_fanout = NULL; \
7950Sstevel@tonic-gate (connp)->conn_next = NULL; \
7960Sstevel@tonic-gate (connp)->conn_prev = NULL; \
7970Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; \
7980Sstevel@tonic-gate if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \
7990Sstevel@tonic-gate ipcl_conn_unlisten((connp)); \
8000Sstevel@tonic-gate CONN_DEC_REF((connp)); \
8010Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); \
8020Sstevel@tonic-gate } \
8030Sstevel@tonic-gate }
8040Sstevel@tonic-gate
8050Sstevel@tonic-gate void
ipcl_hash_remove(conn_t * connp)8060Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp)
8070Sstevel@tonic-gate {
80811042SErik.Nordmark@Sun.COM uint8_t protocol = connp->conn_proto;
80911042SErik.Nordmark@Sun.COM
8100Sstevel@tonic-gate IPCL_HASH_REMOVE(connp);
81111042SErik.Nordmark@Sun.COM if (protocol == IPPROTO_RSVP)
81211042SErik.Nordmark@Sun.COM ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
8130Sstevel@tonic-gate }
8140Sstevel@tonic-gate
8150Sstevel@tonic-gate /*
8160Sstevel@tonic-gate * The whole purpose of this function is allow removal of
8170Sstevel@tonic-gate * a conn_t from the connected hash for timewait reclaim.
8180Sstevel@tonic-gate * This is essentially a TW reclaim fastpath where timewait
8190Sstevel@tonic-gate * collector checks under fanout lock (so no one else can
8200Sstevel@tonic-gate * get access to the conn_t) that refcnt is 2 i.e. one for
8210Sstevel@tonic-gate * TCP and one for the classifier hash list. If ref count
8220Sstevel@tonic-gate * is indeed 2, we can just remove the conn under lock and
8230Sstevel@tonic-gate * avoid cleaning up the conn under squeue. This gives us
8240Sstevel@tonic-gate * improved performance.
8250Sstevel@tonic-gate */
8260Sstevel@tonic-gate void
ipcl_hash_remove_locked(conn_t * connp,connf_t * connfp)8270Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp)
8280Sstevel@tonic-gate {
8290Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connfp->connf_lock));
8300Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock));
8310Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
8320Sstevel@tonic-gate
8330Sstevel@tonic-gate if ((connp)->conn_next != NULL) {
8344691Skcpoon (connp)->conn_next->conn_prev = (connp)->conn_prev;
8350Sstevel@tonic-gate }
8360Sstevel@tonic-gate if ((connp)->conn_prev != NULL) {
8374691Skcpoon (connp)->conn_prev->conn_next = (connp)->conn_next;
8380Sstevel@tonic-gate } else {
8390Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next;
8400Sstevel@tonic-gate }
8410Sstevel@tonic-gate (connp)->conn_fanout = NULL;
8420Sstevel@tonic-gate (connp)->conn_next = NULL;
8430Sstevel@tonic-gate (connp)->conn_prev = NULL;
8440Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED;
8450Sstevel@tonic-gate ASSERT((connp)->conn_ref == 2);
8460Sstevel@tonic-gate (connp)->conn_ref--;
8470Sstevel@tonic-gate }
8480Sstevel@tonic-gate
8490Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \
8500Sstevel@tonic-gate ASSERT((connp)->conn_fanout == NULL); \
8510Sstevel@tonic-gate ASSERT((connp)->conn_next == NULL); \
8520Sstevel@tonic-gate ASSERT((connp)->conn_prev == NULL); \
8530Sstevel@tonic-gate if ((connfp)->connf_head != NULL) { \
8540Sstevel@tonic-gate (connfp)->connf_head->conn_prev = (connp); \
8550Sstevel@tonic-gate (connp)->conn_next = (connfp)->connf_head; \
8560Sstevel@tonic-gate } \
8570Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \
8580Sstevel@tonic-gate (connfp)->connf_head = (connp); \
8590Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
8600Sstevel@tonic-gate IPCL_CONNECTED; \
8610Sstevel@tonic-gate CONN_INC_REF(connp); \
8620Sstevel@tonic-gate }
8630Sstevel@tonic-gate
8640Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \
8650Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \
8660Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \
8670Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \
8680Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \
8690Sstevel@tonic-gate }
8700Sstevel@tonic-gate
8710Sstevel@tonic-gate #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \
8720Sstevel@tonic-gate conn_t *pconnp = NULL, *nconnp; \
8730Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \
8740Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \
8750Sstevel@tonic-gate nconnp = (connfp)->connf_head; \
876153Sethindra while (nconnp != NULL && \
87711042SErik.Nordmark@Sun.COM !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \
878153Sethindra pconnp = nconnp; \
879153Sethindra nconnp = nconnp->conn_next; \
8800Sstevel@tonic-gate } \
8810Sstevel@tonic-gate if (pconnp != NULL) { \
8820Sstevel@tonic-gate pconnp->conn_next = (connp); \
8830Sstevel@tonic-gate (connp)->conn_prev = pconnp; \
8840Sstevel@tonic-gate } else { \
8850Sstevel@tonic-gate (connfp)->connf_head = (connp); \
8860Sstevel@tonic-gate } \
8870Sstevel@tonic-gate if (nconnp != NULL) { \
8880Sstevel@tonic-gate (connp)->conn_next = nconnp; \
8890Sstevel@tonic-gate nconnp->conn_prev = (connp); \
8900Sstevel@tonic-gate } \
8910Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \
8920Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
8930Sstevel@tonic-gate IPCL_BOUND; \
8940Sstevel@tonic-gate CONN_INC_REF(connp); \
8950Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \
8960Sstevel@tonic-gate }
8970Sstevel@tonic-gate
8980Sstevel@tonic-gate #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \
8990Sstevel@tonic-gate conn_t **list, *prev, *next; \
9000Sstevel@tonic-gate boolean_t isv4mapped = \
90111042SErik.Nordmark@Sun.COM IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \
9020Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \
9030Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \
9040Sstevel@tonic-gate list = &(connfp)->connf_head; \
9050Sstevel@tonic-gate prev = NULL; \
9060Sstevel@tonic-gate while ((next = *list) != NULL) { \
9070Sstevel@tonic-gate if (isv4mapped && \
90811042SErik.Nordmark@Sun.COM IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \
9090Sstevel@tonic-gate connp->conn_zoneid == next->conn_zoneid) { \
9100Sstevel@tonic-gate (connp)->conn_next = next; \
9110Sstevel@tonic-gate if (prev != NULL) \
9120Sstevel@tonic-gate prev = next->conn_prev; \
9130Sstevel@tonic-gate next->conn_prev = (connp); \
9140Sstevel@tonic-gate break; \
9150Sstevel@tonic-gate } \
9160Sstevel@tonic-gate list = &next->conn_next; \
9170Sstevel@tonic-gate prev = next; \
9180Sstevel@tonic-gate } \
9190Sstevel@tonic-gate (connp)->conn_prev = prev; \
9200Sstevel@tonic-gate *list = (connp); \
9210Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \
9220Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
9230Sstevel@tonic-gate IPCL_BOUND; \
9240Sstevel@tonic-gate CONN_INC_REF((connp)); \
9250Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \
9260Sstevel@tonic-gate }
9270Sstevel@tonic-gate
9280Sstevel@tonic-gate void
ipcl_hash_insert_wildcard(connf_t * connfp,conn_t * connp)9290Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
9300Sstevel@tonic-gate {
9310Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9320Sstevel@tonic-gate }
9330Sstevel@tonic-gate
9340Sstevel@tonic-gate /*
93510616SSebastien.Roy@Sun.COM * Because the classifier is used to classify inbound packets, the destination
93610616SSebastien.Roy@Sun.COM * address is meant to be our local tunnel address (tunnel source), and the
93710616SSebastien.Roy@Sun.COM * source the remote tunnel address (tunnel destination).
93811042SErik.Nordmark@Sun.COM *
93911042SErik.Nordmark@Sun.COM * Note that conn_proto can't be used for fanout since the upper protocol
94011042SErik.Nordmark@Sun.COM * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
94110616SSebastien.Roy@Sun.COM */
94210616SSebastien.Roy@Sun.COM conn_t *
ipcl_iptun_classify_v4(ipaddr_t * src,ipaddr_t * dst,ip_stack_t * ipst)94310616SSebastien.Roy@Sun.COM ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
94410616SSebastien.Roy@Sun.COM {
94510616SSebastien.Roy@Sun.COM connf_t *connfp;
94610616SSebastien.Roy@Sun.COM conn_t *connp;
94710616SSebastien.Roy@Sun.COM
94810616SSebastien.Roy@Sun.COM /* first look for IPv4 tunnel links */
94910616SSebastien.Roy@Sun.COM connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
95010616SSebastien.Roy@Sun.COM mutex_enter(&connfp->connf_lock);
95110616SSebastien.Roy@Sun.COM for (connp = connfp->connf_head; connp != NULL;
95210616SSebastien.Roy@Sun.COM connp = connp->conn_next) {
95310616SSebastien.Roy@Sun.COM if (IPCL_IPTUN_MATCH(connp, *dst, *src))
95410616SSebastien.Roy@Sun.COM break;
95510616SSebastien.Roy@Sun.COM }
95610616SSebastien.Roy@Sun.COM if (connp != NULL)
95710616SSebastien.Roy@Sun.COM goto done;
95810616SSebastien.Roy@Sun.COM
95910616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock);
96010616SSebastien.Roy@Sun.COM
96110616SSebastien.Roy@Sun.COM /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
96210616SSebastien.Roy@Sun.COM connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
96310616SSebastien.Roy@Sun.COM INADDR_ANY)];
96410616SSebastien.Roy@Sun.COM mutex_enter(&connfp->connf_lock);
96510616SSebastien.Roy@Sun.COM for (connp = connfp->connf_head; connp != NULL;
96610616SSebastien.Roy@Sun.COM connp = connp->conn_next) {
96710616SSebastien.Roy@Sun.COM if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
96810616SSebastien.Roy@Sun.COM break;
96910616SSebastien.Roy@Sun.COM }
97010616SSebastien.Roy@Sun.COM done:
97110616SSebastien.Roy@Sun.COM if (connp != NULL)
97210616SSebastien.Roy@Sun.COM CONN_INC_REF(connp);
97310616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock);
97410616SSebastien.Roy@Sun.COM return (connp);
97510616SSebastien.Roy@Sun.COM }
97610616SSebastien.Roy@Sun.COM
97710616SSebastien.Roy@Sun.COM conn_t *
ipcl_iptun_classify_v6(in6_addr_t * src,in6_addr_t * dst,ip_stack_t * ipst)97810616SSebastien.Roy@Sun.COM ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
97910616SSebastien.Roy@Sun.COM {
98010616SSebastien.Roy@Sun.COM connf_t *connfp;
98110616SSebastien.Roy@Sun.COM conn_t *connp;
98210616SSebastien.Roy@Sun.COM
98310616SSebastien.Roy@Sun.COM /* Look for an IPv6 tunnel link */
98410616SSebastien.Roy@Sun.COM connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
98510616SSebastien.Roy@Sun.COM mutex_enter(&connfp->connf_lock);
98610616SSebastien.Roy@Sun.COM for (connp = connfp->connf_head; connp != NULL;
98710616SSebastien.Roy@Sun.COM connp = connp->conn_next) {
98810616SSebastien.Roy@Sun.COM if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
98910616SSebastien.Roy@Sun.COM CONN_INC_REF(connp);
99010616SSebastien.Roy@Sun.COM break;
99110616SSebastien.Roy@Sun.COM }
99210616SSebastien.Roy@Sun.COM }
99310616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock);
99410616SSebastien.Roy@Sun.COM return (connp);
99510616SSebastien.Roy@Sun.COM }
99610616SSebastien.Roy@Sun.COM
99710616SSebastien.Roy@Sun.COM /*
9980Sstevel@tonic-gate * This function is used only for inserting SCTP raw socket now.
9990Sstevel@tonic-gate * This may change later.
10000Sstevel@tonic-gate *
10010Sstevel@tonic-gate * Note that only one raw socket can be bound to a port. The param
10020Sstevel@tonic-gate * lport is in network byte order.
10030Sstevel@tonic-gate */
10040Sstevel@tonic-gate static int
ipcl_sctp_hash_insert(conn_t * connp,in_port_t lport)10050Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
10060Sstevel@tonic-gate {
10070Sstevel@tonic-gate connf_t *connfp;
10080Sstevel@tonic-gate conn_t *oconnp;
10093448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
10100Sstevel@tonic-gate
10113448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
10120Sstevel@tonic-gate
10130Sstevel@tonic-gate /* Check for existing raw socket already bound to the port. */
10140Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
10150Sstevel@tonic-gate for (oconnp = connfp->connf_head; oconnp != NULL;
1016409Skcpoon oconnp = oconnp->conn_next) {
10170Sstevel@tonic-gate if (oconnp->conn_lport == lport &&
10180Sstevel@tonic-gate oconnp->conn_zoneid == connp->conn_zoneid &&
101911042SErik.Nordmark@Sun.COM oconnp->conn_family == connp->conn_family &&
102011042SErik.Nordmark@Sun.COM ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
102111042SErik.Nordmark@Sun.COM IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
102211042SErik.Nordmark@Sun.COM IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
102311042SErik.Nordmark@Sun.COM IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
102411042SErik.Nordmark@Sun.COM IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
102511042SErik.Nordmark@Sun.COM &connp->conn_laddr_v6))) {
10260Sstevel@tonic-gate break;
10270Sstevel@tonic-gate }
10280Sstevel@tonic-gate }
10290Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
10300Sstevel@tonic-gate if (oconnp != NULL)
10310Sstevel@tonic-gate return (EADDRNOTAVAIL);
10320Sstevel@tonic-gate
103311042SErik.Nordmark@Sun.COM if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
103411042SErik.Nordmark@Sun.COM IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
103511042SErik.Nordmark@Sun.COM if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
103611042SErik.Nordmark@Sun.COM IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
10370Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10380Sstevel@tonic-gate } else {
10390Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
10400Sstevel@tonic-gate }
10410Sstevel@tonic-gate } else {
10420Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp);
10430Sstevel@tonic-gate }
10440Sstevel@tonic-gate return (0);
10450Sstevel@tonic-gate }
10460Sstevel@tonic-gate
104710616SSebastien.Roy@Sun.COM static int
ipcl_iptun_hash_insert(conn_t * connp,ip_stack_t * ipst)104811042SErik.Nordmark@Sun.COM ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
104910616SSebastien.Roy@Sun.COM {
105010616SSebastien.Roy@Sun.COM connf_t *connfp;
105110616SSebastien.Roy@Sun.COM conn_t *tconnp;
105211042SErik.Nordmark@Sun.COM ipaddr_t laddr = connp->conn_laddr_v4;
105311042SErik.Nordmark@Sun.COM ipaddr_t faddr = connp->conn_faddr_v4;
105410616SSebastien.Roy@Sun.COM
105511042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
105610616SSebastien.Roy@Sun.COM mutex_enter(&connfp->connf_lock);
105710616SSebastien.Roy@Sun.COM for (tconnp = connfp->connf_head; tconnp != NULL;
105810616SSebastien.Roy@Sun.COM tconnp = tconnp->conn_next) {
105911042SErik.Nordmark@Sun.COM if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
106010616SSebastien.Roy@Sun.COM /* A tunnel is already bound to these addresses. */
106110616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock);
106210616SSebastien.Roy@Sun.COM return (EADDRINUSE);
106310616SSebastien.Roy@Sun.COM }
106410616SSebastien.Roy@Sun.COM }
106510616SSebastien.Roy@Sun.COM IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
106610616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock);
106710616SSebastien.Roy@Sun.COM return (0);
106810616SSebastien.Roy@Sun.COM }
106910616SSebastien.Roy@Sun.COM
107010616SSebastien.Roy@Sun.COM static int
ipcl_iptun_hash_insert_v6(conn_t * connp,ip_stack_t * ipst)107111042SErik.Nordmark@Sun.COM ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
107210616SSebastien.Roy@Sun.COM {
107310616SSebastien.Roy@Sun.COM connf_t *connfp;
107410616SSebastien.Roy@Sun.COM conn_t *tconnp;
107511042SErik.Nordmark@Sun.COM in6_addr_t *laddr = &connp->conn_laddr_v6;
107611042SErik.Nordmark@Sun.COM in6_addr_t *faddr = &connp->conn_faddr_v6;
107710616SSebastien.Roy@Sun.COM
107811042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
107910616SSebastien.Roy@Sun.COM mutex_enter(&connfp->connf_lock);
108010616SSebastien.Roy@Sun.COM for (tconnp = connfp->connf_head; tconnp != NULL;
108110616SSebastien.Roy@Sun.COM tconnp = tconnp->conn_next) {
108211042SErik.Nordmark@Sun.COM if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
108310616SSebastien.Roy@Sun.COM /* A tunnel is already bound to these addresses. */
108410616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock);
108510616SSebastien.Roy@Sun.COM return (EADDRINUSE);
108610616SSebastien.Roy@Sun.COM }
108710616SSebastien.Roy@Sun.COM }
108810616SSebastien.Roy@Sun.COM IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
108910616SSebastien.Roy@Sun.COM mutex_exit(&connfp->connf_lock);
109010616SSebastien.Roy@Sun.COM return (0);
109110616SSebastien.Roy@Sun.COM }
109210616SSebastien.Roy@Sun.COM
10930Sstevel@tonic-gate /*
10941676Sjpk * Check for a MAC exemption conflict on a labeled system. Note that for
10951676Sjpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
10961676Sjpk * transport layer. This check is for binding all other protocols.
10971676Sjpk *
10981676Sjpk * Returns true if there's a conflict.
10991676Sjpk */
11001676Sjpk static boolean_t
check_exempt_conflict_v4(conn_t * connp,ip_stack_t * ipst)11013448Sdh155122 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
11021676Sjpk {
11031676Sjpk connf_t *connfp;
11041676Sjpk conn_t *tconn;
11051676Sjpk
110611042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
11071676Sjpk mutex_enter(&connfp->connf_lock);
11081676Sjpk for (tconn = connfp->connf_head; tconn != NULL;
11091676Sjpk tconn = tconn->conn_next) {
11101676Sjpk /* We don't allow v4 fallback for v6 raw socket */
111111042SErik.Nordmark@Sun.COM if (connp->conn_family != tconn->conn_family)
11121676Sjpk continue;
11131676Sjpk /* If neither is exempt, then there's no conflict */
111410934Ssommerfeld@sun.com if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
111510934Ssommerfeld@sun.com (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
11161676Sjpk continue;
11179710SKen.Powell@Sun.COM /* We are only concerned about sockets for a different zone */
11189710SKen.Powell@Sun.COM if (connp->conn_zoneid == tconn->conn_zoneid)
11199710SKen.Powell@Sun.COM continue;
11201676Sjpk /* If both are bound to different specific addrs, ok */
112111042SErik.Nordmark@Sun.COM if (connp->conn_laddr_v4 != INADDR_ANY &&
112211042SErik.Nordmark@Sun.COM tconn->conn_laddr_v4 != INADDR_ANY &&
112311042SErik.Nordmark@Sun.COM connp->conn_laddr_v4 != tconn->conn_laddr_v4)
11241676Sjpk continue;
11251676Sjpk /* These two conflict; fail */
11261676Sjpk break;
11271676Sjpk }
11281676Sjpk mutex_exit(&connfp->connf_lock);
11291676Sjpk return (tconn != NULL);
11301676Sjpk }
11311676Sjpk
11321676Sjpk static boolean_t
check_exempt_conflict_v6(conn_t * connp,ip_stack_t * ipst)11333448Sdh155122 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
11341676Sjpk {
11351676Sjpk connf_t *connfp;
11361676Sjpk conn_t *tconn;
11371676Sjpk
113811042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
11391676Sjpk mutex_enter(&connfp->connf_lock);
11401676Sjpk for (tconn = connfp->connf_head; tconn != NULL;
11411676Sjpk tconn = tconn->conn_next) {
11421676Sjpk /* We don't allow v4 fallback for v6 raw socket */
114311042SErik.Nordmark@Sun.COM if (connp->conn_family != tconn->conn_family)
11441676Sjpk continue;
11451676Sjpk /* If neither is exempt, then there's no conflict */
114610934Ssommerfeld@sun.com if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
114710934Ssommerfeld@sun.com (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
11481676Sjpk continue;
11499710SKen.Powell@Sun.COM /* We are only concerned about sockets for a different zone */
11509710SKen.Powell@Sun.COM if (connp->conn_zoneid == tconn->conn_zoneid)
11519710SKen.Powell@Sun.COM continue;
11521676Sjpk /* If both are bound to different addrs, ok */
115311042SErik.Nordmark@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
115411042SErik.Nordmark@Sun.COM !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
115511042SErik.Nordmark@Sun.COM !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
115611042SErik.Nordmark@Sun.COM &tconn->conn_laddr_v6))
11571676Sjpk continue;
11581676Sjpk /* These two conflict; fail */
11591676Sjpk break;
11601676Sjpk }
11611676Sjpk mutex_exit(&connfp->connf_lock);
11621676Sjpk return (tconn != NULL);
11631676Sjpk }
11641676Sjpk
11651676Sjpk /*
11660Sstevel@tonic-gate * (v4, v6) bind hash insertion routines
116711042SErik.Nordmark@Sun.COM * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
11680Sstevel@tonic-gate */
116911042SErik.Nordmark@Sun.COM
11700Sstevel@tonic-gate int
ipcl_bind_insert(conn_t * connp)117111042SErik.Nordmark@Sun.COM ipcl_bind_insert(conn_t *connp)
117211042SErik.Nordmark@Sun.COM {
117311042SErik.Nordmark@Sun.COM if (connp->conn_ipversion == IPV6_VERSION)
117411042SErik.Nordmark@Sun.COM return (ipcl_bind_insert_v6(connp));
117511042SErik.Nordmark@Sun.COM else
117611042SErik.Nordmark@Sun.COM return (ipcl_bind_insert_v4(connp));
117711042SErik.Nordmark@Sun.COM }
117811042SErik.Nordmark@Sun.COM
117911042SErik.Nordmark@Sun.COM int
ipcl_bind_insert_v4(conn_t * connp)118011042SErik.Nordmark@Sun.COM ipcl_bind_insert_v4(conn_t *connp)
11810Sstevel@tonic-gate {
11820Sstevel@tonic-gate connf_t *connfp;
11830Sstevel@tonic-gate int ret = 0;
11843448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
118511042SErik.Nordmark@Sun.COM uint16_t lport = connp->conn_lport;
118611042SErik.Nordmark@Sun.COM uint8_t protocol = connp->conn_proto;
11870Sstevel@tonic-gate
118810616SSebastien.Roy@Sun.COM if (IPCL_IS_IPTUN(connp))
118911042SErik.Nordmark@Sun.COM return (ipcl_iptun_hash_insert(connp, ipst));
119010616SSebastien.Roy@Sun.COM
11910Sstevel@tonic-gate switch (protocol) {
11921676Sjpk default:
11933448Sdh155122 if (is_system_labeled() &&
11943448Sdh155122 check_exempt_conflict_v4(connp, ipst))
11951676Sjpk return (EADDRINUSE);
11961676Sjpk /* FALLTHROUGH */
11970Sstevel@tonic-gate case IPPROTO_UDP:
11980Sstevel@tonic-gate if (protocol == IPPROTO_UDP) {
11993448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[
12003448Sdh155122 IPCL_UDP_HASH(lport, ipst)];
12010Sstevel@tonic-gate } else {
120211042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
12030Sstevel@tonic-gate }
12040Sstevel@tonic-gate
120511042SErik.Nordmark@Sun.COM if (connp->conn_faddr_v4 != INADDR_ANY) {
12060Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp);
120711042SErik.Nordmark@Sun.COM } else if (connp->conn_laddr_v4 != INADDR_ANY) {
12080Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
12090Sstevel@tonic-gate } else {
12100Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12110Sstevel@tonic-gate }
121211042SErik.Nordmark@Sun.COM if (protocol == IPPROTO_RSVP)
121311042SErik.Nordmark@Sun.COM ill_set_inputfn_all(ipst);
12140Sstevel@tonic-gate break;
12150Sstevel@tonic-gate
12160Sstevel@tonic-gate case IPPROTO_TCP:
12170Sstevel@tonic-gate /* Insert it in the Bind Hash */
12181676Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES);
12193448Sdh155122 connfp = &ipst->ips_ipcl_bind_fanout[
12203448Sdh155122 IPCL_BIND_HASH(lport, ipst)];
122111042SErik.Nordmark@Sun.COM if (connp->conn_laddr_v4 != INADDR_ANY) {
12220Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
12230Sstevel@tonic-gate } else {
12240Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12250Sstevel@tonic-gate }
12260Sstevel@tonic-gate if (cl_inet_listen != NULL) {
122711042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ipversion == IPV4_VERSION);
12280Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER;
12298392SHuafeng.Lv@Sun.COM (*cl_inet_listen)(
12308392SHuafeng.Lv@Sun.COM connp->conn_netstack->netstack_stackid,
12318392SHuafeng.Lv@Sun.COM IPPROTO_TCP, AF_INET,
123211042SErik.Nordmark@Sun.COM (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
12330Sstevel@tonic-gate }
12340Sstevel@tonic-gate break;
12350Sstevel@tonic-gate
12360Sstevel@tonic-gate case IPPROTO_SCTP:
12370Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport);
12380Sstevel@tonic-gate break;
12390Sstevel@tonic-gate }
12400Sstevel@tonic-gate
12410Sstevel@tonic-gate return (ret);
12420Sstevel@tonic-gate }
12430Sstevel@tonic-gate
12440Sstevel@tonic-gate int
ipcl_bind_insert_v6(conn_t * connp)124511042SErik.Nordmark@Sun.COM ipcl_bind_insert_v6(conn_t *connp)
12460Sstevel@tonic-gate {
124710616SSebastien.Roy@Sun.COM connf_t *connfp;
124810616SSebastien.Roy@Sun.COM int ret = 0;
12493448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
125011042SErik.Nordmark@Sun.COM uint16_t lport = connp->conn_lport;
125111042SErik.Nordmark@Sun.COM uint8_t protocol = connp->conn_proto;
12520Sstevel@tonic-gate
125310616SSebastien.Roy@Sun.COM if (IPCL_IS_IPTUN(connp)) {
125411042SErik.Nordmark@Sun.COM return (ipcl_iptun_hash_insert_v6(connp, ipst));
125510616SSebastien.Roy@Sun.COM }
125610616SSebastien.Roy@Sun.COM
12570Sstevel@tonic-gate switch (protocol) {
12581676Sjpk default:
12593448Sdh155122 if (is_system_labeled() &&
12603448Sdh155122 check_exempt_conflict_v6(connp, ipst))
12611676Sjpk return (EADDRINUSE);
12621676Sjpk /* FALLTHROUGH */
12630Sstevel@tonic-gate case IPPROTO_UDP:
12640Sstevel@tonic-gate if (protocol == IPPROTO_UDP) {
12653448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[
12663448Sdh155122 IPCL_UDP_HASH(lport, ipst)];
12670Sstevel@tonic-gate } else {
12683448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
12690Sstevel@tonic-gate }
12700Sstevel@tonic-gate
127111042SErik.Nordmark@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
12720Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp);
127311042SErik.Nordmark@Sun.COM } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
12740Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
12750Sstevel@tonic-gate } else {
12760Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12770Sstevel@tonic-gate }
12780Sstevel@tonic-gate break;
12790Sstevel@tonic-gate
12800Sstevel@tonic-gate case IPPROTO_TCP:
12810Sstevel@tonic-gate /* Insert it in the Bind Hash */
12821676Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES);
12833448Sdh155122 connfp = &ipst->ips_ipcl_bind_fanout[
12843448Sdh155122 IPCL_BIND_HASH(lport, ipst)];
128511042SErik.Nordmark@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
12860Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
12870Sstevel@tonic-gate } else {
12880Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12890Sstevel@tonic-gate }
12900Sstevel@tonic-gate if (cl_inet_listen != NULL) {
12910Sstevel@tonic-gate sa_family_t addr_family;
12920Sstevel@tonic-gate uint8_t *laddrp;
12930Sstevel@tonic-gate
129411042SErik.Nordmark@Sun.COM if (connp->conn_ipversion == IPV6_VERSION) {
12950Sstevel@tonic-gate addr_family = AF_INET6;
12960Sstevel@tonic-gate laddrp =
129711042SErik.Nordmark@Sun.COM (uint8_t *)&connp->conn_bound_addr_v6;
12980Sstevel@tonic-gate } else {
12990Sstevel@tonic-gate addr_family = AF_INET;
130011042SErik.Nordmark@Sun.COM laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
13010Sstevel@tonic-gate }
13020Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER;
13038392SHuafeng.Lv@Sun.COM (*cl_inet_listen)(
13048392SHuafeng.Lv@Sun.COM connp->conn_netstack->netstack_stackid,
13058392SHuafeng.Lv@Sun.COM IPPROTO_TCP, addr_family, laddrp, lport, NULL);
13060Sstevel@tonic-gate }
13070Sstevel@tonic-gate break;
13080Sstevel@tonic-gate
13090Sstevel@tonic-gate case IPPROTO_SCTP:
13100Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport);
13110Sstevel@tonic-gate break;
13120Sstevel@tonic-gate }
13130Sstevel@tonic-gate
13140Sstevel@tonic-gate return (ret);
13150Sstevel@tonic-gate }
13160Sstevel@tonic-gate
13170Sstevel@tonic-gate /*
13180Sstevel@tonic-gate * ipcl_conn_hash insertion routines.
131911042SErik.Nordmark@Sun.COM * The caller has already set conn_proto and the addresses/ports in the conn_t.
13200Sstevel@tonic-gate */
132111042SErik.Nordmark@Sun.COM
13220Sstevel@tonic-gate int
ipcl_conn_insert(conn_t * connp)132311042SErik.Nordmark@Sun.COM ipcl_conn_insert(conn_t *connp)
132411042SErik.Nordmark@Sun.COM {
132511042SErik.Nordmark@Sun.COM if (connp->conn_ipversion == IPV6_VERSION)
132611042SErik.Nordmark@Sun.COM return (ipcl_conn_insert_v6(connp));
132711042SErik.Nordmark@Sun.COM else
132811042SErik.Nordmark@Sun.COM return (ipcl_conn_insert_v4(connp));
132911042SErik.Nordmark@Sun.COM }
133011042SErik.Nordmark@Sun.COM
133111042SErik.Nordmark@Sun.COM int
ipcl_conn_insert_v4(conn_t * connp)133211042SErik.Nordmark@Sun.COM ipcl_conn_insert_v4(conn_t *connp)
13330Sstevel@tonic-gate {
13340Sstevel@tonic-gate connf_t *connfp;
13350Sstevel@tonic-gate conn_t *tconnp;
13360Sstevel@tonic-gate int ret = 0;
13373448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
133811042SErik.Nordmark@Sun.COM uint16_t lport = connp->conn_lport;
133911042SErik.Nordmark@Sun.COM uint8_t protocol = connp->conn_proto;
13400Sstevel@tonic-gate
134110616SSebastien.Roy@Sun.COM if (IPCL_IS_IPTUN(connp))
134211042SErik.Nordmark@Sun.COM return (ipcl_iptun_hash_insert(connp, ipst));
134310616SSebastien.Roy@Sun.COM
13440Sstevel@tonic-gate switch (protocol) {
13450Sstevel@tonic-gate case IPPROTO_TCP:
13468432SJonathan.Anderson@Sun.COM /*
134711042SErik.Nordmark@Sun.COM * For TCP, we check whether the connection tuple already
13488432SJonathan.Anderson@Sun.COM * exists before allowing the connection to proceed. We
13498432SJonathan.Anderson@Sun.COM * also allow indexing on the zoneid. This is to allow
13508432SJonathan.Anderson@Sun.COM * multiple shared stack zones to have the same tcp
13518432SJonathan.Anderson@Sun.COM * connection tuple. In practice this only happens for
13528432SJonathan.Anderson@Sun.COM * INADDR_LOOPBACK as it's the only local address which
13538432SJonathan.Anderson@Sun.COM * doesn't have to be unique.
13548432SJonathan.Anderson@Sun.COM */
13553448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[
135611042SErik.Nordmark@Sun.COM IPCL_CONN_HASH(connp->conn_faddr_v4,
13573448Sdh155122 connp->conn_ports, ipst)];
13580Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
13590Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
13600Sstevel@tonic-gate tconnp = tconnp->conn_next) {
136111042SErik.Nordmark@Sun.COM if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
136211042SErik.Nordmark@Sun.COM connp->conn_faddr_v4, connp->conn_laddr_v4,
136311042SErik.Nordmark@Sun.COM connp->conn_ports) &&
136411042SErik.Nordmark@Sun.COM IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
13650Sstevel@tonic-gate /* Already have a conn. bail out */
13660Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
13670Sstevel@tonic-gate return (EADDRINUSE);
13680Sstevel@tonic-gate }
13690Sstevel@tonic-gate }
13700Sstevel@tonic-gate if (connp->conn_fanout != NULL) {
13710Sstevel@tonic-gate /*
13720Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a
13730Sstevel@tonic-gate * rebind. Let it happen.
13740Sstevel@tonic-gate */
13750Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
13760Sstevel@tonic-gate IPCL_HASH_REMOVE(connp);
13770Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
13780Sstevel@tonic-gate }
13793104Sjprakash
13803104Sjprakash ASSERT(connp->conn_recv != NULL);
138111042SErik.Nordmark@Sun.COM ASSERT(connp->conn_recvicmp != NULL);
13823104Sjprakash
13830Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
13840Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
13850Sstevel@tonic-gate break;
13860Sstevel@tonic-gate
13870Sstevel@tonic-gate case IPPROTO_SCTP:
1388409Skcpoon /*
1389409Skcpoon * The raw socket may have already been bound, remove it
1390409Skcpoon * from the hash first.
1391409Skcpoon */
1392409Skcpoon IPCL_HASH_REMOVE(connp);
13930Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport);
13940Sstevel@tonic-gate break;
13950Sstevel@tonic-gate
13961676Sjpk default:
13971676Sjpk /*
13981676Sjpk * Check for conflicts among MAC exempt bindings. For
13991676Sjpk * transports with port numbers, this is done by the upper
14001676Sjpk * level per-transport binding logic. For all others, it's
14011676Sjpk * done here.
14021676Sjpk */
14033448Sdh155122 if (is_system_labeled() &&
14043448Sdh155122 check_exempt_conflict_v4(connp, ipst))
14051676Sjpk return (EADDRINUSE);
14061676Sjpk /* FALLTHROUGH */
14071676Sjpk
14080Sstevel@tonic-gate case IPPROTO_UDP:
14090Sstevel@tonic-gate if (protocol == IPPROTO_UDP) {
14103448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[
141111042SErik.Nordmark@Sun.COM IPCL_UDP_HASH(lport, ipst)];
14120Sstevel@tonic-gate } else {
141311042SErik.Nordmark@Sun.COM connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
14140Sstevel@tonic-gate }
14150Sstevel@tonic-gate
141611042SErik.Nordmark@Sun.COM if (connp->conn_faddr_v4 != INADDR_ANY) {
14170Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp);
141811042SErik.Nordmark@Sun.COM } else if (connp->conn_laddr_v4 != INADDR_ANY) {
14190Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
14200Sstevel@tonic-gate } else {
14210Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
14220Sstevel@tonic-gate }
14230Sstevel@tonic-gate break;
14240Sstevel@tonic-gate }
14250Sstevel@tonic-gate
14260Sstevel@tonic-gate return (ret);
14270Sstevel@tonic-gate }
14280Sstevel@tonic-gate
14290Sstevel@tonic-gate int
ipcl_conn_insert_v6(conn_t * connp)143011042SErik.Nordmark@Sun.COM ipcl_conn_insert_v6(conn_t *connp)
14310Sstevel@tonic-gate {
14320Sstevel@tonic-gate connf_t *connfp;
14330Sstevel@tonic-gate conn_t *tconnp;
14340Sstevel@tonic-gate int ret = 0;
14353448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
143611042SErik.Nordmark@Sun.COM uint16_t lport = connp->conn_lport;
143711042SErik.Nordmark@Sun.COM uint8_t protocol = connp->conn_proto;
143811042SErik.Nordmark@Sun.COM uint_t ifindex = connp->conn_bound_if;
14390Sstevel@tonic-gate
144010616SSebastien.Roy@Sun.COM if (IPCL_IS_IPTUN(connp))
144111042SErik.Nordmark@Sun.COM return (ipcl_iptun_hash_insert_v6(connp, ipst));
144210616SSebastien.Roy@Sun.COM
14430Sstevel@tonic-gate switch (protocol) {
14440Sstevel@tonic-gate case IPPROTO_TCP:
14458432SJonathan.Anderson@Sun.COM
14468432SJonathan.Anderson@Sun.COM /*
14478432SJonathan.Anderson@Sun.COM * For tcp, we check whether the connection tuple already
14488432SJonathan.Anderson@Sun.COM * exists before allowing the connection to proceed. We
14498432SJonathan.Anderson@Sun.COM * also allow indexing on the zoneid. This is to allow
14508432SJonathan.Anderson@Sun.COM * multiple shared stack zones to have the same tcp
14518432SJonathan.Anderson@Sun.COM * connection tuple. In practice this only happens for
14528432SJonathan.Anderson@Sun.COM * ipv6_loopback as it's the only local address which
14538432SJonathan.Anderson@Sun.COM * doesn't have to be unique.
14548432SJonathan.Anderson@Sun.COM */
14553448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[
145611042SErik.Nordmark@Sun.COM IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
14573448Sdh155122 ipst)];
14580Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
14590Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
14600Sstevel@tonic-gate tconnp = tconnp->conn_next) {
146111042SErik.Nordmark@Sun.COM /* NOTE: need to match zoneid. Bug in onnv-gate */
146211042SErik.Nordmark@Sun.COM if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
146311042SErik.Nordmark@Sun.COM connp->conn_faddr_v6, connp->conn_laddr_v6,
14640Sstevel@tonic-gate connp->conn_ports) &&
146511042SErik.Nordmark@Sun.COM (tconnp->conn_bound_if == 0 ||
146611042SErik.Nordmark@Sun.COM tconnp->conn_bound_if == ifindex) &&
146711042SErik.Nordmark@Sun.COM IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
14680Sstevel@tonic-gate /* Already have a conn. bail out */
14690Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
14700Sstevel@tonic-gate return (EADDRINUSE);
14710Sstevel@tonic-gate }
14720Sstevel@tonic-gate }
14730Sstevel@tonic-gate if (connp->conn_fanout != NULL) {
14740Sstevel@tonic-gate /*
14750Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a
14760Sstevel@tonic-gate * rebind. Let it happen.
14770Sstevel@tonic-gate */
14780Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
14790Sstevel@tonic-gate IPCL_HASH_REMOVE(connp);
14800Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
14810Sstevel@tonic-gate }
14820Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
14830Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
14840Sstevel@tonic-gate break;
14850Sstevel@tonic-gate
14860Sstevel@tonic-gate case IPPROTO_SCTP:
1487409Skcpoon IPCL_HASH_REMOVE(connp);
14880Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport);
14890Sstevel@tonic-gate break;
14900Sstevel@tonic-gate
14911676Sjpk default:
14923448Sdh155122 if (is_system_labeled() &&
14933448Sdh155122 check_exempt_conflict_v6(connp, ipst))
14941676Sjpk return (EADDRINUSE);
14951676Sjpk /* FALLTHROUGH */
14960Sstevel@tonic-gate case IPPROTO_UDP:
14970Sstevel@tonic-gate if (protocol == IPPROTO_UDP) {
14983448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[
149911042SErik.Nordmark@Sun.COM IPCL_UDP_HASH(lport, ipst)];
15000Sstevel@tonic-gate } else {
15013448Sdh155122 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
15020Sstevel@tonic-gate }
15030Sstevel@tonic-gate
150411042SErik.Nordmark@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
15050Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp);
150611042SErik.Nordmark@Sun.COM } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
15070Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
15080Sstevel@tonic-gate } else {
15090Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
15100Sstevel@tonic-gate }
15110Sstevel@tonic-gate break;
15120Sstevel@tonic-gate }
15130Sstevel@tonic-gate
15140Sstevel@tonic-gate return (ret);
15150Sstevel@tonic-gate }
15160Sstevel@tonic-gate
15170Sstevel@tonic-gate /*
15180Sstevel@tonic-gate * v4 packet classifying function. looks up the fanout table to
15190Sstevel@tonic-gate * find the conn, the packet belongs to. returns the conn with
15200Sstevel@tonic-gate * the reference held, null otherwise.
15211676Sjpk *
15221676Sjpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection
15231676Sjpk * Lookup" comment block are applied. Labels are also checked as described
15241676Sjpk * above. If the packet is from the inside (looped back), and is from the same
15251676Sjpk * zone, then label checks are omitted.
15260Sstevel@tonic-gate */
15270Sstevel@tonic-gate conn_t *
ipcl_classify_v4(mblk_t * mp,uint8_t protocol,uint_t hdr_len,ip_recv_attr_t * ira,ip_stack_t * ipst)152811042SErik.Nordmark@Sun.COM ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
152911042SErik.Nordmark@Sun.COM ip_recv_attr_t *ira, ip_stack_t *ipst)
15300Sstevel@tonic-gate {
15310Sstevel@tonic-gate ipha_t *ipha;
15320Sstevel@tonic-gate connf_t *connfp, *bind_connfp;
15330Sstevel@tonic-gate uint16_t lport;
15340Sstevel@tonic-gate uint16_t fport;
15350Sstevel@tonic-gate uint32_t ports;
15360Sstevel@tonic-gate conn_t *connp;
15370Sstevel@tonic-gate uint16_t *up;
153811042SErik.Nordmark@Sun.COM zoneid_t zoneid = ira->ira_zoneid;
15390Sstevel@tonic-gate
15400Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr;
15410Sstevel@tonic-gate up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
15420Sstevel@tonic-gate
15430Sstevel@tonic-gate switch (protocol) {
15440Sstevel@tonic-gate case IPPROTO_TCP:
15450Sstevel@tonic-gate ports = *(uint32_t *)up;
15460Sstevel@tonic-gate connfp =
15473448Sdh155122 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
15483448Sdh155122 ports, ipst)];
15490Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
15500Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL;
15510Sstevel@tonic-gate connp = connp->conn_next) {
155211042SErik.Nordmark@Sun.COM if (IPCL_CONN_MATCH(connp, protocol,
155311042SErik.Nordmark@Sun.COM ipha->ipha_src, ipha->ipha_dst, ports) &&
155411042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid ||
155511042SErik.Nordmark@Sun.COM connp->conn_allzones ||
155611042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
155711042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
155811042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
15590Sstevel@tonic-gate break;
15600Sstevel@tonic-gate }
15610Sstevel@tonic-gate
15620Sstevel@tonic-gate if (connp != NULL) {
15631676Sjpk /*
15641676Sjpk * We have a fully-bound TCP connection.
15651676Sjpk *
15661676Sjpk * For labeled systems, there's no need to check the
15671676Sjpk * label here. It's known to be good as we checked
15681676Sjpk * before allowing the connection to become bound.
15691676Sjpk */
15700Sstevel@tonic-gate CONN_INC_REF(connp);
15710Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
15720Sstevel@tonic-gate return (connp);
15730Sstevel@tonic-gate }
15740Sstevel@tonic-gate
15750Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
15760Sstevel@tonic-gate lport = up[1];
15773448Sdh155122 bind_connfp =
15783448Sdh155122 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
15790Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock);
15800Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL;
15810Sstevel@tonic-gate connp = connp->conn_next) {
15821676Sjpk if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
158311042SErik.Nordmark@Sun.COM lport) &&
158411042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid ||
158511042SErik.Nordmark@Sun.COM connp->conn_allzones ||
158611042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
158711042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
158811042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
15890Sstevel@tonic-gate break;
15900Sstevel@tonic-gate }
15910Sstevel@tonic-gate
15921676Sjpk /*
15931676Sjpk * If the matching connection is SLP on a private address, then
15941676Sjpk * the label on the packet must match the local zone's label.
15951676Sjpk * Otherwise, it must be in the label range defined by tnrh.
159611042SErik.Nordmark@Sun.COM * This is ensured by tsol_receive_local.
159711042SErik.Nordmark@Sun.COM *
159811042SErik.Nordmark@Sun.COM * Note that we don't check tsol_receive_local for
159911042SErik.Nordmark@Sun.COM * the connected case.
16001676Sjpk */
160111042SErik.Nordmark@Sun.COM if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
16021676Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
160311042SErik.Nordmark@Sun.COM ira, connp)) {
160411042SErik.Nordmark@Sun.COM DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
160511042SErik.Nordmark@Sun.COM char *, "connp(1) could not receive mp(2)",
160611042SErik.Nordmark@Sun.COM conn_t *, connp, mblk_t *, mp);
16071676Sjpk connp = NULL;
16081676Sjpk }
16091676Sjpk
16100Sstevel@tonic-gate if (connp != NULL) {
16111676Sjpk /* Have a listener at least */
16120Sstevel@tonic-gate CONN_INC_REF(connp);
16130Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
16140Sstevel@tonic-gate return (connp);
16150Sstevel@tonic-gate }
16160Sstevel@tonic-gate
16170Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
16180Sstevel@tonic-gate break;
16190Sstevel@tonic-gate
16200Sstevel@tonic-gate case IPPROTO_UDP:
16210Sstevel@tonic-gate lport = up[1];
16220Sstevel@tonic-gate fport = up[0];
16233448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
16240Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
16250Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL;
16260Sstevel@tonic-gate connp = connp->conn_next) {
16270Sstevel@tonic-gate if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
16280Sstevel@tonic-gate fport, ipha->ipha_src) &&
162911042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid ||
163011042SErik.Nordmark@Sun.COM connp->conn_allzones ||
163111042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
163211042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
16330Sstevel@tonic-gate break;
16340Sstevel@tonic-gate }
16350Sstevel@tonic-gate
163611042SErik.Nordmark@Sun.COM if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
16371676Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
163811042SErik.Nordmark@Sun.COM ira, connp)) {
16391676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp,
16401676Sjpk char *, "connp(1) could not receive mp(2)",
16411676Sjpk conn_t *, connp, mblk_t *, mp);
16421676Sjpk connp = NULL;
16431676Sjpk }
16441676Sjpk
16450Sstevel@tonic-gate if (connp != NULL) {
16460Sstevel@tonic-gate CONN_INC_REF(connp);
16470Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
16480Sstevel@tonic-gate return (connp);
16490Sstevel@tonic-gate }
16500Sstevel@tonic-gate
16510Sstevel@tonic-gate /*
16520Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets
16530Sstevel@tonic-gate */
16540Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
165511042SErik.Nordmark@Sun.COM
16560Sstevel@tonic-gate break;
165710616SSebastien.Roy@Sun.COM
165810616SSebastien.Roy@Sun.COM case IPPROTO_ENCAP:
165910616SSebastien.Roy@Sun.COM case IPPROTO_IPV6:
166010616SSebastien.Roy@Sun.COM return (ipcl_iptun_classify_v4(&ipha->ipha_src,
166110616SSebastien.Roy@Sun.COM &ipha->ipha_dst, ipst));
16620Sstevel@tonic-gate }
16630Sstevel@tonic-gate
16640Sstevel@tonic-gate return (NULL);
16650Sstevel@tonic-gate }
16660Sstevel@tonic-gate
16670Sstevel@tonic-gate conn_t *
ipcl_classify_v6(mblk_t * mp,uint8_t protocol,uint_t hdr_len,ip_recv_attr_t * ira,ip_stack_t * ipst)166811042SErik.Nordmark@Sun.COM ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
166911042SErik.Nordmark@Sun.COM ip_recv_attr_t *ira, ip_stack_t *ipst)
16700Sstevel@tonic-gate {
16710Sstevel@tonic-gate ip6_t *ip6h;
16720Sstevel@tonic-gate connf_t *connfp, *bind_connfp;
16730Sstevel@tonic-gate uint16_t lport;
16740Sstevel@tonic-gate uint16_t fport;
167511042SErik.Nordmark@Sun.COM tcpha_t *tcpha;
16760Sstevel@tonic-gate uint32_t ports;
16770Sstevel@tonic-gate conn_t *connp;
16780Sstevel@tonic-gate uint16_t *up;
167911042SErik.Nordmark@Sun.COM zoneid_t zoneid = ira->ira_zoneid;
16800Sstevel@tonic-gate
16810Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr;
16820Sstevel@tonic-gate
16830Sstevel@tonic-gate switch (protocol) {
16840Sstevel@tonic-gate case IPPROTO_TCP:
168511042SErik.Nordmark@Sun.COM tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
168611042SErik.Nordmark@Sun.COM up = &tcpha->tha_lport;
16870Sstevel@tonic-gate ports = *(uint32_t *)up;
16880Sstevel@tonic-gate
16890Sstevel@tonic-gate connfp =
16903448Sdh155122 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
16913448Sdh155122 ports, ipst)];
16920Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
16930Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL;
16940Sstevel@tonic-gate connp = connp->conn_next) {
169511042SErik.Nordmark@Sun.COM if (IPCL_CONN_MATCH_V6(connp, protocol,
169611042SErik.Nordmark@Sun.COM ip6h->ip6_src, ip6h->ip6_dst, ports) &&
169711042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid ||
169811042SErik.Nordmark@Sun.COM connp->conn_allzones ||
169911042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
170011042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
170111042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17020Sstevel@tonic-gate break;
17030Sstevel@tonic-gate }
17040Sstevel@tonic-gate
17050Sstevel@tonic-gate if (connp != NULL) {
17061676Sjpk /*
17071676Sjpk * We have a fully-bound TCP connection.
17081676Sjpk *
17091676Sjpk * For labeled systems, there's no need to check the
17101676Sjpk * label here. It's known to be good as we checked
17111676Sjpk * before allowing the connection to become bound.
17121676Sjpk */
17130Sstevel@tonic-gate CONN_INC_REF(connp);
17140Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
17150Sstevel@tonic-gate return (connp);
17160Sstevel@tonic-gate }
17170Sstevel@tonic-gate
17180Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
17190Sstevel@tonic-gate
17200Sstevel@tonic-gate lport = up[1];
17213448Sdh155122 bind_connfp =
17223448Sdh155122 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
17230Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock);
17240Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL;
17250Sstevel@tonic-gate connp = connp->conn_next) {
17260Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, protocol,
17270Sstevel@tonic-gate ip6h->ip6_dst, lport) &&
172811042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid ||
172911042SErik.Nordmark@Sun.COM connp->conn_allzones ||
173011042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
173111042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
173211042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17330Sstevel@tonic-gate break;
17340Sstevel@tonic-gate }
17350Sstevel@tonic-gate
173611042SErik.Nordmark@Sun.COM if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
17371676Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
173811042SErik.Nordmark@Sun.COM ira, connp)) {
17391676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
17401676Sjpk char *, "connp(1) could not receive mp(2)",
17411676Sjpk conn_t *, connp, mblk_t *, mp);
17421676Sjpk connp = NULL;
17431676Sjpk }
17441676Sjpk
17450Sstevel@tonic-gate if (connp != NULL) {
17460Sstevel@tonic-gate /* Have a listner at least */
17470Sstevel@tonic-gate CONN_INC_REF(connp);
17480Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
17490Sstevel@tonic-gate return (connp);
17500Sstevel@tonic-gate }
17510Sstevel@tonic-gate
17520Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
17530Sstevel@tonic-gate break;
17540Sstevel@tonic-gate
17550Sstevel@tonic-gate case IPPROTO_UDP:
17560Sstevel@tonic-gate up = (uint16_t *)&mp->b_rptr[hdr_len];
17570Sstevel@tonic-gate lport = up[1];
17580Sstevel@tonic-gate fport = up[0];
17593448Sdh155122 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
17600Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
17610Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL;
17620Sstevel@tonic-gate connp = connp->conn_next) {
17630Sstevel@tonic-gate if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
17640Sstevel@tonic-gate fport, ip6h->ip6_src) &&
176511042SErik.Nordmark@Sun.COM (connp->conn_zoneid == zoneid ||
176611042SErik.Nordmark@Sun.COM connp->conn_allzones ||
176711042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
176811042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
176911042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17700Sstevel@tonic-gate break;
17710Sstevel@tonic-gate }
17720Sstevel@tonic-gate
177311042SErik.Nordmark@Sun.COM if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
17741676Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
177511042SErik.Nordmark@Sun.COM ira, connp)) {
17761676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
17771676Sjpk char *, "connp(1) could not receive mp(2)",
17781676Sjpk conn_t *, connp, mblk_t *, mp);
17791676Sjpk connp = NULL;
17801676Sjpk }
17811676Sjpk
17820Sstevel@tonic-gate if (connp != NULL) {
17830Sstevel@tonic-gate CONN_INC_REF(connp);
17840Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
17850Sstevel@tonic-gate return (connp);
17860Sstevel@tonic-gate }
17870Sstevel@tonic-gate
17880Sstevel@tonic-gate /*
17890Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets
17900Sstevel@tonic-gate */
17910Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
17920Sstevel@tonic-gate break;
179310616SSebastien.Roy@Sun.COM case IPPROTO_ENCAP:
179410616SSebastien.Roy@Sun.COM case IPPROTO_IPV6:
179510616SSebastien.Roy@Sun.COM return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
179610616SSebastien.Roy@Sun.COM &ip6h->ip6_dst, ipst));
17970Sstevel@tonic-gate }
17980Sstevel@tonic-gate
17990Sstevel@tonic-gate return (NULL);
18000Sstevel@tonic-gate }
18010Sstevel@tonic-gate
18020Sstevel@tonic-gate /*
18030Sstevel@tonic-gate * wrapper around ipcl_classify_(v4,v6) routines.
18040Sstevel@tonic-gate */
18050Sstevel@tonic-gate conn_t *
ipcl_classify(mblk_t * mp,ip_recv_attr_t * ira,ip_stack_t * ipst)180611042SErik.Nordmark@Sun.COM ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
18070Sstevel@tonic-gate {
180811042SErik.Nordmark@Sun.COM if (ira->ira_flags & IRAF_IS_IPV4) {
180911042SErik.Nordmark@Sun.COM return (ipcl_classify_v4(mp, ira->ira_protocol,
181011042SErik.Nordmark@Sun.COM ira->ira_ip_hdr_length, ira, ipst));
181111042SErik.Nordmark@Sun.COM } else {
181211042SErik.Nordmark@Sun.COM return (ipcl_classify_v6(mp, ira->ira_protocol,
181311042SErik.Nordmark@Sun.COM ira->ira_ip_hdr_length, ira, ipst));
18140Sstevel@tonic-gate }
18150Sstevel@tonic-gate }
18160Sstevel@tonic-gate
181711042SErik.Nordmark@Sun.COM /*
181811042SErik.Nordmark@Sun.COM * Only used to classify SCTP RAW sockets
181911042SErik.Nordmark@Sun.COM */
18200Sstevel@tonic-gate conn_t *
ipcl_classify_raw(mblk_t * mp,uint8_t protocol,uint32_t ports,ipha_t * ipha,ip6_t * ip6h,ip_recv_attr_t * ira,ip_stack_t * ipst)182111042SErik.Nordmark@Sun.COM ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
182211042SErik.Nordmark@Sun.COM ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
18230Sstevel@tonic-gate {
18241676Sjpk connf_t *connfp;
18250Sstevel@tonic-gate conn_t *connp;
18260Sstevel@tonic-gate in_port_t lport;
182711042SErik.Nordmark@Sun.COM int ipversion;
18281676Sjpk const void *dst;
182911042SErik.Nordmark@Sun.COM zoneid_t zoneid = ira->ira_zoneid;
18300Sstevel@tonic-gate
18310Sstevel@tonic-gate lport = ((uint16_t *)&ports)[1];
183211042SErik.Nordmark@Sun.COM if (ira->ira_flags & IRAF_IS_IPV4) {
183311042SErik.Nordmark@Sun.COM dst = (const void *)&ipha->ipha_dst;
183411042SErik.Nordmark@Sun.COM ipversion = IPV4_VERSION;
183511042SErik.Nordmark@Sun.COM } else {
183611042SErik.Nordmark@Sun.COM dst = (const void *)&ip6h->ip6_dst;
183711042SErik.Nordmark@Sun.COM ipversion = IPV6_VERSION;
18381676Sjpk }
18391676Sjpk
18403448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
18410Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
18420Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL;
18430Sstevel@tonic-gate connp = connp->conn_next) {
18440Sstevel@tonic-gate /* We don't allow v4 fallback for v6 raw socket. */
184511042SErik.Nordmark@Sun.COM if (ipversion != connp->conn_ipversion)
18460Sstevel@tonic-gate continue;
184711042SErik.Nordmark@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
184811042SErik.Nordmark@Sun.COM !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
184911042SErik.Nordmark@Sun.COM if (ipversion == IPV4_VERSION) {
18501676Sjpk if (!IPCL_CONN_MATCH(connp, protocol,
185111042SErik.Nordmark@Sun.COM ipha->ipha_src, ipha->ipha_dst, ports))
18521676Sjpk continue;
18530Sstevel@tonic-gate } else {
18541676Sjpk if (!IPCL_CONN_MATCH_V6(connp, protocol,
185511042SErik.Nordmark@Sun.COM ip6h->ip6_src, ip6h->ip6_dst, ports))
18561676Sjpk continue;
18570Sstevel@tonic-gate }
18580Sstevel@tonic-gate } else {
185911042SErik.Nordmark@Sun.COM if (ipversion == IPV4_VERSION) {
18601676Sjpk if (!IPCL_BIND_MATCH(connp, protocol,
186111042SErik.Nordmark@Sun.COM ipha->ipha_dst, lport))
18621676Sjpk continue;
18630Sstevel@tonic-gate } else {
18641676Sjpk if (!IPCL_BIND_MATCH_V6(connp, protocol,
186511042SErik.Nordmark@Sun.COM ip6h->ip6_dst, lport))
18661676Sjpk continue;
18670Sstevel@tonic-gate }
18680Sstevel@tonic-gate }
18691676Sjpk
187011042SErik.Nordmark@Sun.COM if (connp->conn_zoneid == zoneid ||
187111042SErik.Nordmark@Sun.COM connp->conn_allzones ||
187211042SErik.Nordmark@Sun.COM ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
187311042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
187411042SErik.Nordmark@Sun.COM (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
18751676Sjpk break;
18761676Sjpk }
187711042SErik.Nordmark@Sun.COM
187811042SErik.Nordmark@Sun.COM if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
187911042SErik.Nordmark@Sun.COM !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
18801676Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
18811676Sjpk char *, "connp(1) could not receive mp(2)",
18821676Sjpk conn_t *, connp, mblk_t *, mp);
18831676Sjpk connp = NULL;
18840Sstevel@tonic-gate }
1885409Skcpoon
1886409Skcpoon if (connp != NULL)
1887409Skcpoon goto found;
1888409Skcpoon mutex_exit(&connfp->connf_lock);
1889409Skcpoon
189011042SErik.Nordmark@Sun.COM /* Try to look for a wildcard SCTP RAW socket match. */
18913448Sdh155122 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1892409Skcpoon mutex_enter(&connfp->connf_lock);
1893409Skcpoon for (connp = connfp->connf_head; connp != NULL;
1894409Skcpoon connp = connp->conn_next) {
1895409Skcpoon /* We don't allow v4 fallback for v6 raw socket. */
189611042SErik.Nordmark@Sun.COM if (ipversion != connp->conn_ipversion)
189711042SErik.Nordmark@Sun.COM continue;
189811042SErik.Nordmark@Sun.COM if (!IPCL_ZONE_MATCH(connp, zoneid))
1899409Skcpoon continue;
190011042SErik.Nordmark@Sun.COM
190111042SErik.Nordmark@Sun.COM if (ipversion == IPV4_VERSION) {
190211042SErik.Nordmark@Sun.COM if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1903409Skcpoon break;
1904409Skcpoon } else {
190511042SErik.Nordmark@Sun.COM if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1906409Skcpoon break;
1907409Skcpoon }
1908409Skcpoon }
19090Sstevel@tonic-gate }
1910409Skcpoon
1911409Skcpoon if (connp != NULL)
1912409Skcpoon goto found;
1913409Skcpoon
19140Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
19150Sstevel@tonic-gate return (NULL);
1916409Skcpoon
1917409Skcpoon found:
1918409Skcpoon ASSERT(connp != NULL);
1919409Skcpoon CONN_INC_REF(connp);
1920409Skcpoon mutex_exit(&connfp->connf_lock);
1921409Skcpoon return (connp);
19220Sstevel@tonic-gate }
19230Sstevel@tonic-gate
19240Sstevel@tonic-gate /* ARGSUSED */
19250Sstevel@tonic-gate static int
tcp_conn_constructor(void * buf,void * cdrarg,int kmflags)19265240Snordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
19270Sstevel@tonic-gate {
19280Sstevel@tonic-gate itc_t *itc = (itc_t *)buf;
19290Sstevel@tonic-gate conn_t *connp = &itc->itc_conn;
19305240Snordmark tcp_t *tcp = (tcp_t *)&itc[1];
19315240Snordmark
19325240Snordmark bzero(connp, sizeof (conn_t));
19335240Snordmark bzero(tcp, sizeof (tcp_t));
19345240Snordmark
19355240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
19365240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
19378348SEric.Yu@Sun.COM cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
193811042SErik.Nordmark@Sun.COM tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
193911042SErik.Nordmark@Sun.COM if (tcp->tcp_timercache == NULL)
194011042SErik.Nordmark@Sun.COM return (ENOMEM);
19410Sstevel@tonic-gate connp->conn_tcp = tcp;
19420Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN;
194311042SErik.Nordmark@Sun.COM connp->conn_proto = IPPROTO_TCP;
19440Sstevel@tonic-gate tcp->tcp_connp = connp;
194511042SErik.Nordmark@Sun.COM rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
194611042SErik.Nordmark@Sun.COM
194711042SErik.Nordmark@Sun.COM connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
194811042SErik.Nordmark@Sun.COM if (connp->conn_ixa == NULL) {
194911042SErik.Nordmark@Sun.COM tcp_timermp_free(tcp);
195011042SErik.Nordmark@Sun.COM return (ENOMEM);
195111042SErik.Nordmark@Sun.COM }
195211042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_refcnt = 1;
195311042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_protocol = connp->conn_proto;
195411042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
19550Sstevel@tonic-gate return (0);
19560Sstevel@tonic-gate }
19570Sstevel@tonic-gate
19580Sstevel@tonic-gate /* ARGSUSED */
19590Sstevel@tonic-gate static void
tcp_conn_destructor(void * buf,void * cdrarg)19605240Snordmark tcp_conn_destructor(void *buf, void *cdrarg)
19615240Snordmark {
19625240Snordmark itc_t *itc = (itc_t *)buf;
19635240Snordmark conn_t *connp = &itc->itc_conn;
19645240Snordmark tcp_t *tcp = (tcp_t *)&itc[1];
19655240Snordmark
19665240Snordmark ASSERT(connp->conn_flags & IPCL_TCPCONN);
19675240Snordmark ASSERT(tcp->tcp_connp == connp);
19685240Snordmark ASSERT(connp->conn_tcp == tcp);
19695240Snordmark tcp_timermp_free(tcp);
19705240Snordmark mutex_destroy(&connp->conn_lock);
19715240Snordmark cv_destroy(&connp->conn_cv);
19728348SEric.Yu@Sun.COM cv_destroy(&connp->conn_sq_cv);
197311042SErik.Nordmark@Sun.COM rw_destroy(&connp->conn_ilg_lock);
197411042SErik.Nordmark@Sun.COM
197511042SErik.Nordmark@Sun.COM /* Can be NULL if constructor failed */
197611042SErik.Nordmark@Sun.COM if (connp->conn_ixa != NULL) {
197711042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_refcnt == 1);
197811042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_ire == NULL);
197911042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_nce == NULL);
198011042SErik.Nordmark@Sun.COM ixa_refrele(connp->conn_ixa);
198111042SErik.Nordmark@Sun.COM }
19825240Snordmark }
19835240Snordmark
19845240Snordmark /* ARGSUSED */
19855240Snordmark static int
ip_conn_constructor(void * buf,void * cdrarg,int kmflags)19865240Snordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
19875240Snordmark {
19885240Snordmark itc_t *itc = (itc_t *)buf;
19895240Snordmark conn_t *connp = &itc->itc_conn;
19905240Snordmark
19915240Snordmark bzero(connp, sizeof (conn_t));
19925240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
19935240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
19945240Snordmark connp->conn_flags = IPCL_IPCCONN;
199511042SErik.Nordmark@Sun.COM rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
19965240Snordmark
199711042SErik.Nordmark@Sun.COM connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
199811042SErik.Nordmark@Sun.COM if (connp->conn_ixa == NULL)
199911042SErik.Nordmark@Sun.COM return (ENOMEM);
200011042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_refcnt = 1;
200111042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
20025240Snordmark return (0);
20035240Snordmark }
20045240Snordmark
20055240Snordmark /* ARGSUSED */
20065240Snordmark static void
ip_conn_destructor(void * buf,void * cdrarg)20075240Snordmark ip_conn_destructor(void *buf, void *cdrarg)
20085240Snordmark {
20095240Snordmark itc_t *itc = (itc_t *)buf;
20105240Snordmark conn_t *connp = &itc->itc_conn;
20115240Snordmark
20125240Snordmark ASSERT(connp->conn_flags & IPCL_IPCCONN);
20135240Snordmark ASSERT(connp->conn_priv == NULL);
20145240Snordmark mutex_destroy(&connp->conn_lock);
20155240Snordmark cv_destroy(&connp->conn_cv);
201611042SErik.Nordmark@Sun.COM rw_destroy(&connp->conn_ilg_lock);
201711042SErik.Nordmark@Sun.COM
201811042SErik.Nordmark@Sun.COM /* Can be NULL if constructor failed */
201911042SErik.Nordmark@Sun.COM if (connp->conn_ixa != NULL) {
202011042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_refcnt == 1);
202111042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_ire == NULL);
202211042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_nce == NULL);
202311042SErik.Nordmark@Sun.COM ixa_refrele(connp->conn_ixa);
202411042SErik.Nordmark@Sun.COM }
20255240Snordmark }
20265240Snordmark
20275240Snordmark /* ARGSUSED */
20285240Snordmark static int
udp_conn_constructor(void * buf,void * cdrarg,int kmflags)20295240Snordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
20305240Snordmark {
20315240Snordmark itc_t *itc = (itc_t *)buf;
20325240Snordmark conn_t *connp = &itc->itc_conn;
20335240Snordmark udp_t *udp = (udp_t *)&itc[1];
20345240Snordmark
20355240Snordmark bzero(connp, sizeof (conn_t));
20365240Snordmark bzero(udp, sizeof (udp_t));
20375240Snordmark
20385240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
20395240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
20405240Snordmark connp->conn_udp = udp;
20415240Snordmark connp->conn_flags = IPCL_UDPCONN;
204211042SErik.Nordmark@Sun.COM connp->conn_proto = IPPROTO_UDP;
20435240Snordmark udp->udp_connp = connp;
204411042SErik.Nordmark@Sun.COM rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
204511042SErik.Nordmark@Sun.COM connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
204611042SErik.Nordmark@Sun.COM if (connp->conn_ixa == NULL)
204711042SErik.Nordmark@Sun.COM return (ENOMEM);
204811042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_refcnt = 1;
204911042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_protocol = connp->conn_proto;
205011042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
20515240Snordmark return (0);
20525240Snordmark }
20535240Snordmark
20545240Snordmark /* ARGSUSED */
20555240Snordmark static void
udp_conn_destructor(void * buf,void * cdrarg)20565240Snordmark udp_conn_destructor(void *buf, void *cdrarg)
20575240Snordmark {
20585240Snordmark itc_t *itc = (itc_t *)buf;
20595240Snordmark conn_t *connp = &itc->itc_conn;
20605240Snordmark udp_t *udp = (udp_t *)&itc[1];
20615240Snordmark
20625240Snordmark ASSERT(connp->conn_flags & IPCL_UDPCONN);
20635240Snordmark ASSERT(udp->udp_connp == connp);
20645240Snordmark ASSERT(connp->conn_udp == udp);
20655240Snordmark mutex_destroy(&connp->conn_lock);
20665240Snordmark cv_destroy(&connp->conn_cv);
206711042SErik.Nordmark@Sun.COM rw_destroy(&connp->conn_ilg_lock);
206811042SErik.Nordmark@Sun.COM
206911042SErik.Nordmark@Sun.COM /* Can be NULL if constructor failed */
207011042SErik.Nordmark@Sun.COM if (connp->conn_ixa != NULL) {
207111042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_refcnt == 1);
207211042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_ire == NULL);
207311042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_nce == NULL);
207411042SErik.Nordmark@Sun.COM ixa_refrele(connp->conn_ixa);
207511042SErik.Nordmark@Sun.COM }
20765240Snordmark }
20775240Snordmark
20785240Snordmark /* ARGSUSED */
20795240Snordmark static int
rawip_conn_constructor(void * buf,void * cdrarg,int kmflags)20805240Snordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
20810Sstevel@tonic-gate {
20825240Snordmark itc_t *itc = (itc_t *)buf;
20835240Snordmark conn_t *connp = &itc->itc_conn;
20845240Snordmark icmp_t *icmp = (icmp_t *)&itc[1];
20855240Snordmark
20865240Snordmark bzero(connp, sizeof (conn_t));
20875240Snordmark bzero(icmp, sizeof (icmp_t));
20885240Snordmark
20895240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
20905240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
20915240Snordmark connp->conn_icmp = icmp;
20925240Snordmark connp->conn_flags = IPCL_RAWIPCONN;
209311042SErik.Nordmark@Sun.COM connp->conn_proto = IPPROTO_ICMP;
20945240Snordmark icmp->icmp_connp = connp;
209511042SErik.Nordmark@Sun.COM rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
209611042SErik.Nordmark@Sun.COM connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
209711042SErik.Nordmark@Sun.COM if (connp->conn_ixa == NULL)
209811042SErik.Nordmark@Sun.COM return (ENOMEM);
209911042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_refcnt = 1;
210011042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_protocol = connp->conn_proto;
210111042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
21025240Snordmark return (0);
21035240Snordmark }
21045240Snordmark
21055240Snordmark /* ARGSUSED */
21065240Snordmark static void
rawip_conn_destructor(void * buf,void * cdrarg)21075240Snordmark rawip_conn_destructor(void *buf, void *cdrarg)
21085240Snordmark {
21095240Snordmark itc_t *itc = (itc_t *)buf;
21105240Snordmark conn_t *connp = &itc->itc_conn;
21115240Snordmark icmp_t *icmp = (icmp_t *)&itc[1];
21125240Snordmark
21135240Snordmark ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
21145240Snordmark ASSERT(icmp->icmp_connp == connp);
21155240Snordmark ASSERT(connp->conn_icmp == icmp);
21165240Snordmark mutex_destroy(&connp->conn_lock);
21175240Snordmark cv_destroy(&connp->conn_cv);
211811042SErik.Nordmark@Sun.COM rw_destroy(&connp->conn_ilg_lock);
211911042SErik.Nordmark@Sun.COM
212011042SErik.Nordmark@Sun.COM /* Can be NULL if constructor failed */
212111042SErik.Nordmark@Sun.COM if (connp->conn_ixa != NULL) {
212211042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_refcnt == 1);
212311042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_ire == NULL);
212411042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_nce == NULL);
212511042SErik.Nordmark@Sun.COM ixa_refrele(connp->conn_ixa);
212611042SErik.Nordmark@Sun.COM }
21275240Snordmark }
21285240Snordmark
21295240Snordmark /* ARGSUSED */
21305240Snordmark static int
rts_conn_constructor(void * buf,void * cdrarg,int kmflags)21315240Snordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
21325240Snordmark {
21335240Snordmark itc_t *itc = (itc_t *)buf;
21345240Snordmark conn_t *connp = &itc->itc_conn;
21355240Snordmark rts_t *rts = (rts_t *)&itc[1];
21365240Snordmark
21375240Snordmark bzero(connp, sizeof (conn_t));
21385240Snordmark bzero(rts, sizeof (rts_t));
21395240Snordmark
21405240Snordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
21415240Snordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
21425240Snordmark connp->conn_rts = rts;
21435240Snordmark connp->conn_flags = IPCL_RTSCONN;
21445240Snordmark rts->rts_connp = connp;
214511042SErik.Nordmark@Sun.COM rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
214611042SErik.Nordmark@Sun.COM connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
214711042SErik.Nordmark@Sun.COM if (connp->conn_ixa == NULL)
214811042SErik.Nordmark@Sun.COM return (ENOMEM);
214911042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_refcnt = 1;
215011042SErik.Nordmark@Sun.COM connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
21515240Snordmark return (0);
21525240Snordmark }
21535240Snordmark
21545240Snordmark /* ARGSUSED */
21555240Snordmark static void
rts_conn_destructor(void * buf,void * cdrarg)21565240Snordmark rts_conn_destructor(void *buf, void *cdrarg)
21575240Snordmark {
21585240Snordmark itc_t *itc = (itc_t *)buf;
21595240Snordmark conn_t *connp = &itc->itc_conn;
21605240Snordmark rts_t *rts = (rts_t *)&itc[1];
21615240Snordmark
21625240Snordmark ASSERT(connp->conn_flags & IPCL_RTSCONN);
21635240Snordmark ASSERT(rts->rts_connp == connp);
21645240Snordmark ASSERT(connp->conn_rts == rts);
21655240Snordmark mutex_destroy(&connp->conn_lock);
21665240Snordmark cv_destroy(&connp->conn_cv);
216711042SErik.Nordmark@Sun.COM rw_destroy(&connp->conn_ilg_lock);
21688444SRao.Shoaib@Sun.COM
216911042SErik.Nordmark@Sun.COM /* Can be NULL if constructor failed */
217011042SErik.Nordmark@Sun.COM if (connp->conn_ixa != NULL) {
217111042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_refcnt == 1);
217211042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_ire == NULL);
217311042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa->ixa_nce == NULL);
217411042SErik.Nordmark@Sun.COM ixa_refrele(connp->conn_ixa);
21758348SEric.Yu@Sun.COM }
21768348SEric.Yu@Sun.COM }
21778348SEric.Yu@Sun.COM
21785240Snordmark /*
21795240Snordmark * Called as part of ipcl_conn_destroy to assert and clear any pointers
21805240Snordmark * in the conn_t.
218111042SErik.Nordmark@Sun.COM *
218211042SErik.Nordmark@Sun.COM * Below we list all the pointers in the conn_t as a documentation aid.
218311042SErik.Nordmark@Sun.COM * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
218411042SErik.Nordmark@Sun.COM * If you add any pointers to the conn_t please add an ASSERT here
218511042SErik.Nordmark@Sun.COM * and #ifdef it out if it can't be actually asserted to be NULL.
218611042SErik.Nordmark@Sun.COM * In any case, we bzero most of the conn_t at the end of the function.
21875240Snordmark */
21885240Snordmark void
ipcl_conn_cleanup(conn_t * connp)21895240Snordmark ipcl_conn_cleanup(conn_t *connp)
21905240Snordmark {
219111042SErik.Nordmark@Sun.COM ip_xmit_attr_t *ixa;
219211042SErik.Nordmark@Sun.COM
21935240Snordmark ASSERT(connp->conn_latch == NULL);
219411042SErik.Nordmark@Sun.COM ASSERT(connp->conn_latch_in_policy == NULL);
219511042SErik.Nordmark@Sun.COM ASSERT(connp->conn_latch_in_action == NULL);
21965240Snordmark #ifdef notdef
21975240Snordmark ASSERT(connp->conn_rq == NULL);
21985240Snordmark ASSERT(connp->conn_wq == NULL);
21995240Snordmark #endif
22005240Snordmark ASSERT(connp->conn_cred == NULL);
22015240Snordmark ASSERT(connp->conn_g_fanout == NULL);
22025240Snordmark ASSERT(connp->conn_g_next == NULL);
22035240Snordmark ASSERT(connp->conn_g_prev == NULL);
22045240Snordmark ASSERT(connp->conn_policy == NULL);
22055240Snordmark ASSERT(connp->conn_fanout == NULL);
22065240Snordmark ASSERT(connp->conn_next == NULL);
22075240Snordmark ASSERT(connp->conn_prev == NULL);
22085240Snordmark ASSERT(connp->conn_oper_pending_ill == NULL);
22095240Snordmark ASSERT(connp->conn_ilg == NULL);
22105240Snordmark ASSERT(connp->conn_drain_next == NULL);
22115240Snordmark ASSERT(connp->conn_drain_prev == NULL);
22125277Snordmark #ifdef notdef
22135277Snordmark /* conn_idl is not cleared when removed from idl list */
22145240Snordmark ASSERT(connp->conn_idl == NULL);
22155277Snordmark #endif
22165240Snordmark ASSERT(connp->conn_ipsec_opt_mp == NULL);
221711042SErik.Nordmark@Sun.COM #ifdef notdef
221811042SErik.Nordmark@Sun.COM /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
22195240Snordmark ASSERT(connp->conn_netstack == NULL);
222011042SErik.Nordmark@Sun.COM #endif
22215240Snordmark
22228348SEric.Yu@Sun.COM ASSERT(connp->conn_helper_info == NULL);
222311042SErik.Nordmark@Sun.COM ASSERT(connp->conn_ixa != NULL);
222411042SErik.Nordmark@Sun.COM ixa = connp->conn_ixa;
222511042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_refcnt == 1);
222611042SErik.Nordmark@Sun.COM /* Need to preserve ixa_protocol */
222711042SErik.Nordmark@Sun.COM ixa_cleanup(ixa);
222811042SErik.Nordmark@Sun.COM ixa->ixa_flags = 0;
222911042SErik.Nordmark@Sun.COM
22305240Snordmark /* Clear out the conn_t fields that are not preserved */
22315240Snordmark bzero(&connp->conn_start_clr,
22325240Snordmark sizeof (conn_t) -
22335240Snordmark ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
22340Sstevel@tonic-gate }
22350Sstevel@tonic-gate
22360Sstevel@tonic-gate /*
22370Sstevel@tonic-gate * All conns are inserted in a global multi-list for the benefit of
22380Sstevel@tonic-gate * walkers. The walk is guaranteed to walk all open conns at the time
22390Sstevel@tonic-gate * of the start of the walk exactly once. This property is needed to
22400Sstevel@tonic-gate * achieve some cleanups during unplumb of interfaces. This is achieved
22410Sstevel@tonic-gate * as follows.
22420Sstevel@tonic-gate *
22430Sstevel@tonic-gate * ipcl_conn_create and ipcl_conn_destroy are the only functions that
22440Sstevel@tonic-gate * call the insert and delete functions below at creation and deletion
22450Sstevel@tonic-gate * time respectively. The conn never moves or changes its position in this
22460Sstevel@tonic-gate * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
22470Sstevel@tonic-gate * won't increase due to walkers, once the conn deletion has started. Note
22480Sstevel@tonic-gate * that we can't remove the conn from the global list and then wait for
22490Sstevel@tonic-gate * the refcnt to drop to zero, since walkers would then see a truncated
22500Sstevel@tonic-gate * list. CONN_INCIPIENT ensures that walkers don't start looking at
22510Sstevel@tonic-gate * conns until ip_open is ready to make them globally visible.
22520Sstevel@tonic-gate * The global round robin multi-list locks are held only to get the
22530Sstevel@tonic-gate * next member/insertion/deletion and contention should be negligible
22540Sstevel@tonic-gate * if the multi-list is much greater than the number of cpus.
22550Sstevel@tonic-gate */
22560Sstevel@tonic-gate void
ipcl_globalhash_insert(conn_t * connp)22570Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp)
22580Sstevel@tonic-gate {
22590Sstevel@tonic-gate int index;
22603448Sdh155122 struct connf_s *connfp;
22613448Sdh155122 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
22620Sstevel@tonic-gate
22630Sstevel@tonic-gate /*
22640Sstevel@tonic-gate * No need for atomic here. Approximate even distribution
22650Sstevel@tonic-gate * in the global lists is sufficient.
22660Sstevel@tonic-gate */
22673448Sdh155122 ipst->ips_conn_g_index++;
22683448Sdh155122 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
22690Sstevel@tonic-gate
22700Sstevel@tonic-gate connp->conn_g_prev = NULL;
22710Sstevel@tonic-gate /*
22720Sstevel@tonic-gate * Mark as INCIPIENT, so that walkers will ignore this
22730Sstevel@tonic-gate * for now, till ip_open is ready to make it visible globally.
22740Sstevel@tonic-gate */
22750Sstevel@tonic-gate connp->conn_state_flags |= CONN_INCIPIENT;
22760Sstevel@tonic-gate
22773448Sdh155122 connfp = &ipst->ips_ipcl_globalhash_fanout[index];
22780Sstevel@tonic-gate /* Insert at the head of the list */
22793448Sdh155122 mutex_enter(&connfp->connf_lock);
22803448Sdh155122 connp->conn_g_next = connfp->connf_head;
22810Sstevel@tonic-gate if (connp->conn_g_next != NULL)
22820Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp;
22833448Sdh155122 connfp->connf_head = connp;
22840Sstevel@tonic-gate
22850Sstevel@tonic-gate /* The fanout bucket this conn points to */
22863448Sdh155122 connp->conn_g_fanout = connfp;
22870Sstevel@tonic-gate
22883448Sdh155122 mutex_exit(&connfp->connf_lock);
22890Sstevel@tonic-gate }
22900Sstevel@tonic-gate
22910Sstevel@tonic-gate void
ipcl_globalhash_remove(conn_t * connp)22920Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp)
22930Sstevel@tonic-gate {
22943448Sdh155122 struct connf_s *connfp;
22953448Sdh155122
22960Sstevel@tonic-gate /*
22970Sstevel@tonic-gate * We were never inserted in the global multi list.
22980Sstevel@tonic-gate * IPCL_NONE variety is never inserted in the global multilist
22990Sstevel@tonic-gate * since it is presumed to not need any cleanup and is transient.
23000Sstevel@tonic-gate */
23010Sstevel@tonic-gate if (connp->conn_g_fanout == NULL)
23020Sstevel@tonic-gate return;
23030Sstevel@tonic-gate
23043448Sdh155122 connfp = connp->conn_g_fanout;
23053448Sdh155122 mutex_enter(&connfp->connf_lock);
23060Sstevel@tonic-gate if (connp->conn_g_prev != NULL)
23070Sstevel@tonic-gate connp->conn_g_prev->conn_g_next = connp->conn_g_next;
23080Sstevel@tonic-gate else
23093448Sdh155122 connfp->connf_head = connp->conn_g_next;
23100Sstevel@tonic-gate if (connp->conn_g_next != NULL)
23110Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
23123448Sdh155122 mutex_exit(&connfp->connf_lock);
23130Sstevel@tonic-gate
23140Sstevel@tonic-gate /* Better to stumble on a null pointer than to corrupt memory */
23150Sstevel@tonic-gate connp->conn_g_next = NULL;
23160Sstevel@tonic-gate connp->conn_g_prev = NULL;
23175240Snordmark connp->conn_g_fanout = NULL;
23180Sstevel@tonic-gate }
23190Sstevel@tonic-gate
23200Sstevel@tonic-gate /*
23210Sstevel@tonic-gate * Walk the list of all conn_t's in the system, calling the function provided
232211042SErik.Nordmark@Sun.COM * With the specified argument for each.
23230Sstevel@tonic-gate * Applies to both IPv4 and IPv6.
23240Sstevel@tonic-gate *
232511042SErik.Nordmark@Sun.COM * CONNs may hold pointers to ills (conn_dhcpinit_ill and
232611042SErik.Nordmark@Sun.COM * conn_oper_pending_ill). To guard against stale pointers
23270Sstevel@tonic-gate * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
23280Sstevel@tonic-gate * unplumbed or removed. New conn_t's that are created while we are walking
23290Sstevel@tonic-gate * may be missed by this walk, because they are not necessarily inserted
23300Sstevel@tonic-gate * at the tail of the list. They are new conn_t's and thus don't have any
23310Sstevel@tonic-gate * stale pointers. The CONN_CLOSING flag ensures that no new reference
23320Sstevel@tonic-gate * is created to the struct that is going away.
23330Sstevel@tonic-gate */
23340Sstevel@tonic-gate void
ipcl_walk(pfv_t func,void * arg,ip_stack_t * ipst)23353448Sdh155122 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
23360Sstevel@tonic-gate {
23370Sstevel@tonic-gate int i;
23380Sstevel@tonic-gate conn_t *connp;
23390Sstevel@tonic-gate conn_t *prev_connp;
23400Sstevel@tonic-gate
23410Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) {
23423448Sdh155122 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23430Sstevel@tonic-gate prev_connp = NULL;
23443448Sdh155122 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
23450Sstevel@tonic-gate while (connp != NULL) {
23460Sstevel@tonic-gate mutex_enter(&connp->conn_lock);
23470Sstevel@tonic-gate if (connp->conn_state_flags &
23480Sstevel@tonic-gate (CONN_CONDEMNED | CONN_INCIPIENT)) {
23490Sstevel@tonic-gate mutex_exit(&connp->conn_lock);
23500Sstevel@tonic-gate connp = connp->conn_g_next;
23510Sstevel@tonic-gate continue;
23520Sstevel@tonic-gate }
23530Sstevel@tonic-gate CONN_INC_REF_LOCKED(connp);
23540Sstevel@tonic-gate mutex_exit(&connp->conn_lock);
23553448Sdh155122 mutex_exit(
23563448Sdh155122 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23570Sstevel@tonic-gate (*func)(connp, arg);
23580Sstevel@tonic-gate if (prev_connp != NULL)
23590Sstevel@tonic-gate CONN_DEC_REF(prev_connp);
23603448Sdh155122 mutex_enter(
23613448Sdh155122 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23620Sstevel@tonic-gate prev_connp = connp;
23630Sstevel@tonic-gate connp = connp->conn_g_next;
23640Sstevel@tonic-gate }
23653448Sdh155122 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23660Sstevel@tonic-gate if (prev_connp != NULL)
23670Sstevel@tonic-gate CONN_DEC_REF(prev_connp);
23680Sstevel@tonic-gate }
23690Sstevel@tonic-gate }
23700Sstevel@tonic-gate
23710Sstevel@tonic-gate /*
23720Sstevel@tonic-gate * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
23730Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference
23740Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries
23752323Sethindra * (peer tcp in ESTABLISHED state).
23760Sstevel@tonic-gate */
23770Sstevel@tonic-gate conn_t *
ipcl_conn_tcp_lookup_reversed_ipv4(conn_t * connp,ipha_t * ipha,tcpha_t * tcpha,ip_stack_t * ipst)237811042SErik.Nordmark@Sun.COM ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
23793448Sdh155122 ip_stack_t *ipst)
23800Sstevel@tonic-gate {
23810Sstevel@tonic-gate uint32_t ports;
23820Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports;
23830Sstevel@tonic-gate connf_t *connfp;
23840Sstevel@tonic-gate conn_t *tconnp;
23850Sstevel@tonic-gate boolean_t zone_chk;
23860Sstevel@tonic-gate
23870Sstevel@tonic-gate /*
23880Sstevel@tonic-gate * If either the source of destination address is loopback, then
23890Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of
23900Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED
23910Sstevel@tonic-gate * state) and the endpoints may reside in different Zones.
23920Sstevel@tonic-gate */
23930Sstevel@tonic-gate zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
23940Sstevel@tonic-gate ipha->ipha_dst == htonl(INADDR_LOOPBACK));
23950Sstevel@tonic-gate
239611042SErik.Nordmark@Sun.COM pports[0] = tcpha->tha_fport;
239711042SErik.Nordmark@Sun.COM pports[1] = tcpha->tha_lport;
23980Sstevel@tonic-gate
23993448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
24003448Sdh155122 ports, ipst)];
24010Sstevel@tonic-gate
24020Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
24030Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
24040Sstevel@tonic-gate tconnp = tconnp->conn_next) {
24050Sstevel@tonic-gate
24060Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
24070Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) &&
24082323Sethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
24090Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
24100Sstevel@tonic-gate
24110Sstevel@tonic-gate ASSERT(tconnp != connp);
24120Sstevel@tonic-gate CONN_INC_REF(tconnp);
24130Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
24140Sstevel@tonic-gate return (tconnp);
24150Sstevel@tonic-gate }
24160Sstevel@tonic-gate }
24170Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
24180Sstevel@tonic-gate return (NULL);
24190Sstevel@tonic-gate }
24200Sstevel@tonic-gate
24210Sstevel@tonic-gate /*
24220Sstevel@tonic-gate * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
24230Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference
24240Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries
24252323Sethindra * (peer tcp in ESTABLISHED state).
24260Sstevel@tonic-gate */
24270Sstevel@tonic-gate conn_t *
ipcl_conn_tcp_lookup_reversed_ipv6(conn_t * connp,ip6_t * ip6h,tcpha_t * tcpha,ip_stack_t * ipst)242811042SErik.Nordmark@Sun.COM ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
24293448Sdh155122 ip_stack_t *ipst)
24300Sstevel@tonic-gate {
24310Sstevel@tonic-gate uint32_t ports;
24320Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports;
24330Sstevel@tonic-gate connf_t *connfp;
24340Sstevel@tonic-gate conn_t *tconnp;
24350Sstevel@tonic-gate boolean_t zone_chk;
24360Sstevel@tonic-gate
24370Sstevel@tonic-gate /*
24380Sstevel@tonic-gate * If either the source of destination address is loopback, then
24390Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of
24400Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED
24410Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. We
24420Sstevel@tonic-gate * don't do Zone check for link local address(es) because the
24430Sstevel@tonic-gate * current Zone implementation treats each link local address as
24440Sstevel@tonic-gate * being unique per system node, i.e. they belong to global Zone.
24450Sstevel@tonic-gate */
24460Sstevel@tonic-gate zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
24470Sstevel@tonic-gate IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
24480Sstevel@tonic-gate
244911042SErik.Nordmark@Sun.COM pports[0] = tcpha->tha_fport;
245011042SErik.Nordmark@Sun.COM pports[1] = tcpha->tha_lport;
24510Sstevel@tonic-gate
24523448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
24533448Sdh155122 ports, ipst)];
24540Sstevel@tonic-gate
24550Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
24560Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
24570Sstevel@tonic-gate tconnp = tconnp->conn_next) {
24580Sstevel@tonic-gate
245911042SErik.Nordmark@Sun.COM /* We skip conn_bound_if check here as this is loopback tcp */
24600Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
24610Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) &&
24622323Sethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
24630Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
24640Sstevel@tonic-gate
24650Sstevel@tonic-gate ASSERT(tconnp != connp);
24660Sstevel@tonic-gate CONN_INC_REF(tconnp);
24670Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
24680Sstevel@tonic-gate return (tconnp);
24690Sstevel@tonic-gate }
24700Sstevel@tonic-gate }
24710Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
24720Sstevel@tonic-gate return (NULL);
24730Sstevel@tonic-gate }
24740Sstevel@tonic-gate
24750Sstevel@tonic-gate /*
24760Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram.
24770Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF.
24780Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks.
24790Sstevel@tonic-gate */
24800Sstevel@tonic-gate conn_t *
ipcl_tcp_lookup_reversed_ipv4(ipha_t * ipha,tcpha_t * tcpha,int min_state,ip_stack_t * ipst)248111042SErik.Nordmark@Sun.COM ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
24823448Sdh155122 ip_stack_t *ipst)
24830Sstevel@tonic-gate {
24840Sstevel@tonic-gate uint32_t ports;
24850Sstevel@tonic-gate uint16_t *pports;
24860Sstevel@tonic-gate connf_t *connfp;
24870Sstevel@tonic-gate conn_t *tconnp;
24880Sstevel@tonic-gate
24890Sstevel@tonic-gate pports = (uint16_t *)&ports;
249011042SErik.Nordmark@Sun.COM pports[0] = tcpha->tha_fport;
249111042SErik.Nordmark@Sun.COM pports[1] = tcpha->tha_lport;
24920Sstevel@tonic-gate
24933448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
24944691Skcpoon ports, ipst)];
24950Sstevel@tonic-gate
24960Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
24970Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
24980Sstevel@tonic-gate tconnp = tconnp->conn_next) {
24990Sstevel@tonic-gate
25000Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
25010Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) &&
25020Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= min_state) {
25030Sstevel@tonic-gate
25040Sstevel@tonic-gate CONN_INC_REF(tconnp);
25050Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
25060Sstevel@tonic-gate return (tconnp);
25070Sstevel@tonic-gate }
25080Sstevel@tonic-gate }
25090Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
25100Sstevel@tonic-gate return (NULL);
25110Sstevel@tonic-gate }
25120Sstevel@tonic-gate
25130Sstevel@tonic-gate /*
25140Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram.
25150Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF.
25160Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks.
25170Sstevel@tonic-gate * Match on ifindex in addition to addresses.
25180Sstevel@tonic-gate */
25190Sstevel@tonic-gate conn_t *
ipcl_tcp_lookup_reversed_ipv6(ip6_t * ip6h,tcpha_t * tcpha,int min_state,uint_t ifindex,ip_stack_t * ipst)25200Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
25213448Sdh155122 uint_t ifindex, ip_stack_t *ipst)
25220Sstevel@tonic-gate {
25230Sstevel@tonic-gate tcp_t *tcp;
25240Sstevel@tonic-gate uint32_t ports;
25250Sstevel@tonic-gate uint16_t *pports;
25260Sstevel@tonic-gate connf_t *connfp;
25270Sstevel@tonic-gate conn_t *tconnp;
25280Sstevel@tonic-gate
25290Sstevel@tonic-gate pports = (uint16_t *)&ports;
25300Sstevel@tonic-gate pports[0] = tcpha->tha_fport;
25310Sstevel@tonic-gate pports[1] = tcpha->tha_lport;
25320Sstevel@tonic-gate
25333448Sdh155122 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
25344691Skcpoon ports, ipst)];
25350Sstevel@tonic-gate
25360Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
25370Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
25380Sstevel@tonic-gate tconnp = tconnp->conn_next) {
25390Sstevel@tonic-gate
25400Sstevel@tonic-gate tcp = tconnp->conn_tcp;
25410Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
25420Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) &&
25430Sstevel@tonic-gate tcp->tcp_state >= min_state &&
254411042SErik.Nordmark@Sun.COM (tconnp->conn_bound_if == 0 ||
254511042SErik.Nordmark@Sun.COM tconnp->conn_bound_if == ifindex)) {
25460Sstevel@tonic-gate
25470Sstevel@tonic-gate CONN_INC_REF(tconnp);
25480Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
25490Sstevel@tonic-gate return (tconnp);
25500Sstevel@tonic-gate }
25510Sstevel@tonic-gate }
25520Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
25530Sstevel@tonic-gate return (NULL);
25540Sstevel@tonic-gate }
25550Sstevel@tonic-gate
25560Sstevel@tonic-gate /*
25571676Sjpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
25581676Sjpk * a listener when changing state.
25590Sstevel@tonic-gate */
25600Sstevel@tonic-gate conn_t *
ipcl_lookup_listener_v4(uint16_t lport,ipaddr_t laddr,zoneid_t zoneid,ip_stack_t * ipst)25613448Sdh155122 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
25623448Sdh155122 ip_stack_t *ipst)
25630Sstevel@tonic-gate {
25640Sstevel@tonic-gate connf_t *bind_connfp;
25650Sstevel@tonic-gate conn_t *connp;
25660Sstevel@tonic-gate tcp_t *tcp;
25670Sstevel@tonic-gate
25680Sstevel@tonic-gate /*
25690Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of
25700Sstevel@tonic-gate * all zeros.
25710Sstevel@tonic-gate */
25720Sstevel@tonic-gate if (laddr == 0)
25730Sstevel@tonic-gate return (NULL);
25740Sstevel@tonic-gate
25751676Sjpk ASSERT(zoneid != ALL_ZONES);
25761676Sjpk
25773448Sdh155122 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
25780Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock);
25790Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL;
25800Sstevel@tonic-gate connp = connp->conn_next) {
25810Sstevel@tonic-gate tcp = connp->conn_tcp;
25820Sstevel@tonic-gate if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
25832263Ssommerfe IPCL_ZONE_MATCH(connp, zoneid) &&
25840Sstevel@tonic-gate (tcp->tcp_listener == NULL)) {
25850Sstevel@tonic-gate CONN_INC_REF(connp);
25860Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
25870Sstevel@tonic-gate return (connp);
25880Sstevel@tonic-gate }
25890Sstevel@tonic-gate }
25900Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
25910Sstevel@tonic-gate return (NULL);
25920Sstevel@tonic-gate }
25930Sstevel@tonic-gate
25941676Sjpk /*
25951676Sjpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
25961676Sjpk * a listener when changing state.
25971676Sjpk */
25980Sstevel@tonic-gate conn_t *
ipcl_lookup_listener_v6(uint16_t lport,in6_addr_t * laddr,uint_t ifindex,zoneid_t zoneid,ip_stack_t * ipst)25990Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
26003448Sdh155122 zoneid_t zoneid, ip_stack_t *ipst)
26010Sstevel@tonic-gate {
26020Sstevel@tonic-gate connf_t *bind_connfp;
26030Sstevel@tonic-gate conn_t *connp = NULL;
26040Sstevel@tonic-gate tcp_t *tcp;
26050Sstevel@tonic-gate
26060Sstevel@tonic-gate /*
26070Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of
26080Sstevel@tonic-gate * all zeros.
26090Sstevel@tonic-gate */
26100Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(laddr))
26110Sstevel@tonic-gate return (NULL);
26120Sstevel@tonic-gate
26131676Sjpk ASSERT(zoneid != ALL_ZONES);
26140Sstevel@tonic-gate
26153448Sdh155122 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
26160Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock);
26170Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL;
26180Sstevel@tonic-gate connp = connp->conn_next) {
26190Sstevel@tonic-gate tcp = connp->conn_tcp;
26200Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
26212263Ssommerfe IPCL_ZONE_MATCH(connp, zoneid) &&
262211042SErik.Nordmark@Sun.COM (connp->conn_bound_if == 0 ||
262311042SErik.Nordmark@Sun.COM connp->conn_bound_if == ifindex) &&
26240Sstevel@tonic-gate tcp->tcp_listener == NULL) {
26250Sstevel@tonic-gate CONN_INC_REF(connp);
26260Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
26270Sstevel@tonic-gate return (connp);
26280Sstevel@tonic-gate }
26290Sstevel@tonic-gate }
26300Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
26310Sstevel@tonic-gate return (NULL);
26320Sstevel@tonic-gate }
26330Sstevel@tonic-gate
2634741Smasputra /*
2635741Smasputra * ipcl_get_next_conn
2636741Smasputra * get the next entry in the conn global list
2637741Smasputra * and put a reference on the next_conn.
2638741Smasputra * decrement the reference on the current conn.
2639741Smasputra *
2640741Smasputra * This is an iterator based walker function that also provides for
2641741Smasputra * some selection by the caller. It walks through the conn_hash bucket
2642741Smasputra * searching for the next valid connp in the list, and selects connections
2643741Smasputra * that are neither closed nor condemned. It also REFHOLDS the conn
2644741Smasputra * thus ensuring that the conn exists when the caller uses the conn.
2645741Smasputra */
2646741Smasputra conn_t *
ipcl_get_next_conn(connf_t * connfp,conn_t * connp,uint32_t conn_flags)2647741Smasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2648741Smasputra {
2649741Smasputra conn_t *next_connp;
2650741Smasputra
2651741Smasputra if (connfp == NULL)
2652741Smasputra return (NULL);
2653741Smasputra
2654741Smasputra mutex_enter(&connfp->connf_lock);
2655741Smasputra
2656741Smasputra next_connp = (connp == NULL) ?
2657741Smasputra connfp->connf_head : connp->conn_g_next;
2658741Smasputra
2659741Smasputra while (next_connp != NULL) {
2660741Smasputra mutex_enter(&next_connp->conn_lock);
2661741Smasputra if (!(next_connp->conn_flags & conn_flags) ||
2662741Smasputra (next_connp->conn_state_flags &
2663741Smasputra (CONN_CONDEMNED | CONN_INCIPIENT))) {
2664741Smasputra /*
2665741Smasputra * This conn has been condemned or
2666741Smasputra * is closing, or the flags don't match
2667741Smasputra */
2668741Smasputra mutex_exit(&next_connp->conn_lock);
2669741Smasputra next_connp = next_connp->conn_g_next;
2670741Smasputra continue;
2671741Smasputra }
2672741Smasputra CONN_INC_REF_LOCKED(next_connp);
2673741Smasputra mutex_exit(&next_connp->conn_lock);
2674741Smasputra break;
2675741Smasputra }
2676741Smasputra
2677741Smasputra mutex_exit(&connfp->connf_lock);
2678741Smasputra
2679741Smasputra if (connp != NULL)
2680741Smasputra CONN_DEC_REF(connp);
2681741Smasputra
2682741Smasputra return (next_connp);
2683741Smasputra }
2684741Smasputra
26850Sstevel@tonic-gate #ifdef CONN_DEBUG
26860Sstevel@tonic-gate /*
26870Sstevel@tonic-gate * Trace of the last NBUF refhold/refrele
26880Sstevel@tonic-gate */
26890Sstevel@tonic-gate int
conn_trace_ref(conn_t * connp)26900Sstevel@tonic-gate conn_trace_ref(conn_t *connp)
26910Sstevel@tonic-gate {
26920Sstevel@tonic-gate int last;
26930Sstevel@tonic-gate conn_trace_t *ctb;
26940Sstevel@tonic-gate
26950Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock));
26960Sstevel@tonic-gate last = connp->conn_trace_last;
26970Sstevel@tonic-gate last++;
26980Sstevel@tonic-gate if (last == CONN_TRACE_MAX)
26990Sstevel@tonic-gate last = 0;
27000Sstevel@tonic-gate
27010Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last];
27025023Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
27030Sstevel@tonic-gate connp->conn_trace_last = last;
27040Sstevel@tonic-gate return (1);
27050Sstevel@tonic-gate }
27060Sstevel@tonic-gate
27070Sstevel@tonic-gate int
conn_untrace_ref(conn_t * connp)27080Sstevel@tonic-gate conn_untrace_ref(conn_t *connp)
27090Sstevel@tonic-gate {
27100Sstevel@tonic-gate int last;
27110Sstevel@tonic-gate conn_trace_t *ctb;
27120Sstevel@tonic-gate
27130Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock));
27140Sstevel@tonic-gate last = connp->conn_trace_last;
27150Sstevel@tonic-gate last++;
27160Sstevel@tonic-gate if (last == CONN_TRACE_MAX)
27170Sstevel@tonic-gate last = 0;
27180Sstevel@tonic-gate
27190Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last];
27205023Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
27210Sstevel@tonic-gate connp->conn_trace_last = last;
27220Sstevel@tonic-gate return (1);
27230Sstevel@tonic-gate }
27240Sstevel@tonic-gate #endif
2725