111754SKacheong.Poon@Sun.COM /*
211754SKacheong.Poon@Sun.COM * CDDL HEADER START
311754SKacheong.Poon@Sun.COM *
411754SKacheong.Poon@Sun.COM * The contents of this file are subject to the terms of the
511754SKacheong.Poon@Sun.COM * Common Development and Distribution License (the "License").
611754SKacheong.Poon@Sun.COM * You may not use this file except in compliance with the License.
711754SKacheong.Poon@Sun.COM *
811754SKacheong.Poon@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
911754SKacheong.Poon@Sun.COM * or http://www.opensolaris.org/os/licensing.
1011754SKacheong.Poon@Sun.COM * See the License for the specific language governing permissions
1111754SKacheong.Poon@Sun.COM * and limitations under the License.
1211754SKacheong.Poon@Sun.COM *
1311754SKacheong.Poon@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each
1411754SKacheong.Poon@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1511754SKacheong.Poon@Sun.COM * If applicable, add the following below this CDDL HEADER, with the
1611754SKacheong.Poon@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying
1711754SKacheong.Poon@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner]
1811754SKacheong.Poon@Sun.COM *
1911754SKacheong.Poon@Sun.COM * CDDL HEADER END
2011754SKacheong.Poon@Sun.COM */
2111754SKacheong.Poon@Sun.COM
2211754SKacheong.Poon@Sun.COM /*
23*12869SKacheong.Poon@Sun.COM * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
2411754SKacheong.Poon@Sun.COM */
2511754SKacheong.Poon@Sun.COM
2611754SKacheong.Poon@Sun.COM #include <sys/types.h>
2711754SKacheong.Poon@Sun.COM #include <sys/strlog.h>
2811754SKacheong.Poon@Sun.COM #include <sys/policy.h>
2911754SKacheong.Poon@Sun.COM #include <sys/strsun.h>
3011754SKacheong.Poon@Sun.COM #include <sys/squeue_impl.h>
3111754SKacheong.Poon@Sun.COM #include <sys/squeue.h>
3211754SKacheong.Poon@Sun.COM
3311754SKacheong.Poon@Sun.COM #include <inet/common.h>
3411754SKacheong.Poon@Sun.COM #include <inet/ip.h>
3511754SKacheong.Poon@Sun.COM #include <inet/tcp.h>
3611754SKacheong.Poon@Sun.COM #include <inet/tcp_impl.h>
3711754SKacheong.Poon@Sun.COM
3811754SKacheong.Poon@Sun.COM /* Control whether TCP can enter defensive mode when under memory pressure. */
3911754SKacheong.Poon@Sun.COM static boolean_t tcp_do_reclaim = B_TRUE;
4011754SKacheong.Poon@Sun.COM
4111754SKacheong.Poon@Sun.COM /*
4211754SKacheong.Poon@Sun.COM * Routines related to the TCP_IOC_ABORT_CONN ioctl command.
4311754SKacheong.Poon@Sun.COM *
4411754SKacheong.Poon@Sun.COM * TCP_IOC_ABORT_CONN is a non-transparent ioctl command used for aborting
4511754SKacheong.Poon@Sun.COM * TCP connections. To invoke this ioctl, a tcp_ioc_abort_conn_t structure
4611754SKacheong.Poon@Sun.COM * (defined in tcp.h) needs to be filled in and passed into the kernel
4711754SKacheong.Poon@Sun.COM * via an I_STR ioctl command (see streamio(7I)). The tcp_ioc_abort_conn_t
4811754SKacheong.Poon@Sun.COM * structure contains the four-tuple of a TCP connection and a range of TCP
4911754SKacheong.Poon@Sun.COM * states (specified by ac_start and ac_end). The use of wildcard addresses
5011754SKacheong.Poon@Sun.COM * and ports is allowed. Connections with a matching four tuple and a state
5111754SKacheong.Poon@Sun.COM * within the specified range will be aborted. The valid states for the
5211754SKacheong.Poon@Sun.COM * ac_start and ac_end fields are in the range TCPS_SYN_SENT to TCPS_TIME_WAIT,
5311754SKacheong.Poon@Sun.COM * inclusive.
5411754SKacheong.Poon@Sun.COM *
5511754SKacheong.Poon@Sun.COM * An application which has its connection aborted by this ioctl will receive
5611754SKacheong.Poon@Sun.COM * an error that is dependent on the connection state at the time of the abort.
5711754SKacheong.Poon@Sun.COM * If the connection state is < TCPS_TIME_WAIT, an application should behave as
5811754SKacheong.Poon@Sun.COM * though a RST packet has been received. If the connection state is equal to
5911754SKacheong.Poon@Sun.COM * TCPS_TIME_WAIT, the 2MSL timeout will immediately be canceled by the kernel
6011754SKacheong.Poon@Sun.COM * and all resources associated with the connection will be freed.
6111754SKacheong.Poon@Sun.COM */
6211754SKacheong.Poon@Sun.COM static mblk_t *tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t *, tcp_t *);
6311754SKacheong.Poon@Sun.COM static void tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t *);
6411754SKacheong.Poon@Sun.COM static void tcp_ioctl_abort_handler(void *arg, mblk_t *mp, void *arg2,
6511754SKacheong.Poon@Sun.COM ip_recv_attr_t *dummy);
6611754SKacheong.Poon@Sun.COM static int tcp_ioctl_abort(tcp_ioc_abort_conn_t *, tcp_stack_t *tcps);
6711754SKacheong.Poon@Sun.COM void tcp_ioctl_abort_conn(queue_t *, mblk_t *);
6811754SKacheong.Poon@Sun.COM static int tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *, int, int *,
6911754SKacheong.Poon@Sun.COM boolean_t, tcp_stack_t *);
7011754SKacheong.Poon@Sun.COM
7111754SKacheong.Poon@Sun.COM /*
7211754SKacheong.Poon@Sun.COM * Macros used for accessing the different types of sockaddr
7311754SKacheong.Poon@Sun.COM * structures inside a tcp_ioc_abort_conn_t.
7411754SKacheong.Poon@Sun.COM */
7511754SKacheong.Poon@Sun.COM #define TCP_AC_V4LADDR(acp) ((sin_t *)&(acp)->ac_local)
7611754SKacheong.Poon@Sun.COM #define TCP_AC_V4RADDR(acp) ((sin_t *)&(acp)->ac_remote)
7711754SKacheong.Poon@Sun.COM #define TCP_AC_V4LOCAL(acp) (TCP_AC_V4LADDR(acp)->sin_addr.s_addr)
7811754SKacheong.Poon@Sun.COM #define TCP_AC_V4REMOTE(acp) (TCP_AC_V4RADDR(acp)->sin_addr.s_addr)
7911754SKacheong.Poon@Sun.COM #define TCP_AC_V4LPORT(acp) (TCP_AC_V4LADDR(acp)->sin_port)
8011754SKacheong.Poon@Sun.COM #define TCP_AC_V4RPORT(acp) (TCP_AC_V4RADDR(acp)->sin_port)
8111754SKacheong.Poon@Sun.COM #define TCP_AC_V6LADDR(acp) ((sin6_t *)&(acp)->ac_local)
8211754SKacheong.Poon@Sun.COM #define TCP_AC_V6RADDR(acp) ((sin6_t *)&(acp)->ac_remote)
8311754SKacheong.Poon@Sun.COM #define TCP_AC_V6LOCAL(acp) (TCP_AC_V6LADDR(acp)->sin6_addr)
8411754SKacheong.Poon@Sun.COM #define TCP_AC_V6REMOTE(acp) (TCP_AC_V6RADDR(acp)->sin6_addr)
8511754SKacheong.Poon@Sun.COM #define TCP_AC_V6LPORT(acp) (TCP_AC_V6LADDR(acp)->sin6_port)
8611754SKacheong.Poon@Sun.COM #define TCP_AC_V6RPORT(acp) (TCP_AC_V6RADDR(acp)->sin6_port)
8711754SKacheong.Poon@Sun.COM
8811754SKacheong.Poon@Sun.COM /*
8911754SKacheong.Poon@Sun.COM * Return the correct error code to mimic the behavior
9011754SKacheong.Poon@Sun.COM * of a connection reset.
9111754SKacheong.Poon@Sun.COM */
9211754SKacheong.Poon@Sun.COM #define TCP_AC_GET_ERRCODE(state, err) { \
9311754SKacheong.Poon@Sun.COM switch ((state)) { \
9411754SKacheong.Poon@Sun.COM case TCPS_SYN_SENT: \
9511754SKacheong.Poon@Sun.COM case TCPS_SYN_RCVD: \
9611754SKacheong.Poon@Sun.COM (err) = ECONNREFUSED; \
9711754SKacheong.Poon@Sun.COM break; \
9811754SKacheong.Poon@Sun.COM case TCPS_ESTABLISHED: \
9911754SKacheong.Poon@Sun.COM case TCPS_FIN_WAIT_1: \
10011754SKacheong.Poon@Sun.COM case TCPS_FIN_WAIT_2: \
10111754SKacheong.Poon@Sun.COM case TCPS_CLOSE_WAIT: \
10211754SKacheong.Poon@Sun.COM (err) = ECONNRESET; \
10311754SKacheong.Poon@Sun.COM break; \
10411754SKacheong.Poon@Sun.COM case TCPS_CLOSING: \
10511754SKacheong.Poon@Sun.COM case TCPS_LAST_ACK: \
10611754SKacheong.Poon@Sun.COM case TCPS_TIME_WAIT: \
10711754SKacheong.Poon@Sun.COM (err) = 0; \
10811754SKacheong.Poon@Sun.COM break; \
10911754SKacheong.Poon@Sun.COM default: \
11011754SKacheong.Poon@Sun.COM (err) = ENXIO; \
11111754SKacheong.Poon@Sun.COM } \
11211754SKacheong.Poon@Sun.COM }
11311754SKacheong.Poon@Sun.COM
11411754SKacheong.Poon@Sun.COM /*
11511754SKacheong.Poon@Sun.COM * Check if a tcp structure matches the info in acp.
11611754SKacheong.Poon@Sun.COM */
11711754SKacheong.Poon@Sun.COM #define TCP_AC_ADDR_MATCH(acp, connp, tcp) \
11811754SKacheong.Poon@Sun.COM (((acp)->ac_local.ss_family == AF_INET) ? \
11911754SKacheong.Poon@Sun.COM ((TCP_AC_V4LOCAL((acp)) == INADDR_ANY || \
12011754SKacheong.Poon@Sun.COM TCP_AC_V4LOCAL((acp)) == (connp)->conn_laddr_v4) && \
12111754SKacheong.Poon@Sun.COM (TCP_AC_V4REMOTE((acp)) == INADDR_ANY || \
12211754SKacheong.Poon@Sun.COM TCP_AC_V4REMOTE((acp)) == (connp)->conn_faddr_v4) && \
12311754SKacheong.Poon@Sun.COM (TCP_AC_V4LPORT((acp)) == 0 || \
12411754SKacheong.Poon@Sun.COM TCP_AC_V4LPORT((acp)) == (connp)->conn_lport) && \
12511754SKacheong.Poon@Sun.COM (TCP_AC_V4RPORT((acp)) == 0 || \
12611754SKacheong.Poon@Sun.COM TCP_AC_V4RPORT((acp)) == (connp)->conn_fport) && \
12711754SKacheong.Poon@Sun.COM (acp)->ac_start <= (tcp)->tcp_state && \
12811754SKacheong.Poon@Sun.COM (acp)->ac_end >= (tcp)->tcp_state) : \
12911754SKacheong.Poon@Sun.COM ((IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6LOCAL((acp))) || \
13011754SKacheong.Poon@Sun.COM IN6_ARE_ADDR_EQUAL(&TCP_AC_V6LOCAL((acp)), \
13111754SKacheong.Poon@Sun.COM &(connp)->conn_laddr_v6)) && \
13211754SKacheong.Poon@Sun.COM (IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6REMOTE((acp))) || \
13311754SKacheong.Poon@Sun.COM IN6_ARE_ADDR_EQUAL(&TCP_AC_V6REMOTE((acp)), \
13411754SKacheong.Poon@Sun.COM &(connp)->conn_faddr_v6)) && \
13511754SKacheong.Poon@Sun.COM (TCP_AC_V6LPORT((acp)) == 0 || \
13611754SKacheong.Poon@Sun.COM TCP_AC_V6LPORT((acp)) == (connp)->conn_lport) && \
13711754SKacheong.Poon@Sun.COM (TCP_AC_V6RPORT((acp)) == 0 || \
13811754SKacheong.Poon@Sun.COM TCP_AC_V6RPORT((acp)) == (connp)->conn_fport) && \
13911754SKacheong.Poon@Sun.COM (acp)->ac_start <= (tcp)->tcp_state && \
14011754SKacheong.Poon@Sun.COM (acp)->ac_end >= (tcp)->tcp_state))
14111754SKacheong.Poon@Sun.COM
14211754SKacheong.Poon@Sun.COM #define TCP_AC_MATCH(acp, connp, tcp) \
14311754SKacheong.Poon@Sun.COM (((acp)->ac_zoneid == ALL_ZONES || \
14411754SKacheong.Poon@Sun.COM (acp)->ac_zoneid == (connp)->conn_zoneid) ? \
14511754SKacheong.Poon@Sun.COM TCP_AC_ADDR_MATCH(acp, connp, tcp) : 0)
14611754SKacheong.Poon@Sun.COM
14711754SKacheong.Poon@Sun.COM /*
14811754SKacheong.Poon@Sun.COM * Build a message containing a tcp_ioc_abort_conn_t structure
14911754SKacheong.Poon@Sun.COM * which is filled in with information from acp and tp.
15011754SKacheong.Poon@Sun.COM */
15111754SKacheong.Poon@Sun.COM static mblk_t *
tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t * acp,tcp_t * tp)15211754SKacheong.Poon@Sun.COM tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t *acp, tcp_t *tp)
15311754SKacheong.Poon@Sun.COM {
15411754SKacheong.Poon@Sun.COM mblk_t *mp;
15511754SKacheong.Poon@Sun.COM tcp_ioc_abort_conn_t *tacp;
15611754SKacheong.Poon@Sun.COM
15711754SKacheong.Poon@Sun.COM mp = allocb(sizeof (uint32_t) + sizeof (*acp), BPRI_LO);
15811754SKacheong.Poon@Sun.COM if (mp == NULL)
15911754SKacheong.Poon@Sun.COM return (NULL);
16011754SKacheong.Poon@Sun.COM
16111754SKacheong.Poon@Sun.COM *((uint32_t *)mp->b_rptr) = TCP_IOC_ABORT_CONN;
16211754SKacheong.Poon@Sun.COM tacp = (tcp_ioc_abort_conn_t *)((uchar_t *)mp->b_rptr +
16311754SKacheong.Poon@Sun.COM sizeof (uint32_t));
16411754SKacheong.Poon@Sun.COM
16511754SKacheong.Poon@Sun.COM tacp->ac_start = acp->ac_start;
16611754SKacheong.Poon@Sun.COM tacp->ac_end = acp->ac_end;
16711754SKacheong.Poon@Sun.COM tacp->ac_zoneid = acp->ac_zoneid;
16811754SKacheong.Poon@Sun.COM
16911754SKacheong.Poon@Sun.COM if (acp->ac_local.ss_family == AF_INET) {
17011754SKacheong.Poon@Sun.COM tacp->ac_local.ss_family = AF_INET;
17111754SKacheong.Poon@Sun.COM tacp->ac_remote.ss_family = AF_INET;
17211754SKacheong.Poon@Sun.COM TCP_AC_V4LOCAL(tacp) = tp->tcp_connp->conn_laddr_v4;
17311754SKacheong.Poon@Sun.COM TCP_AC_V4REMOTE(tacp) = tp->tcp_connp->conn_faddr_v4;
17411754SKacheong.Poon@Sun.COM TCP_AC_V4LPORT(tacp) = tp->tcp_connp->conn_lport;
17511754SKacheong.Poon@Sun.COM TCP_AC_V4RPORT(tacp) = tp->tcp_connp->conn_fport;
17611754SKacheong.Poon@Sun.COM } else {
17711754SKacheong.Poon@Sun.COM tacp->ac_local.ss_family = AF_INET6;
17811754SKacheong.Poon@Sun.COM tacp->ac_remote.ss_family = AF_INET6;
17911754SKacheong.Poon@Sun.COM TCP_AC_V6LOCAL(tacp) = tp->tcp_connp->conn_laddr_v6;
18011754SKacheong.Poon@Sun.COM TCP_AC_V6REMOTE(tacp) = tp->tcp_connp->conn_faddr_v6;
18111754SKacheong.Poon@Sun.COM TCP_AC_V6LPORT(tacp) = tp->tcp_connp->conn_lport;
18211754SKacheong.Poon@Sun.COM TCP_AC_V6RPORT(tacp) = tp->tcp_connp->conn_fport;
18311754SKacheong.Poon@Sun.COM }
18411754SKacheong.Poon@Sun.COM mp->b_wptr = (uchar_t *)mp->b_rptr + sizeof (uint32_t) + sizeof (*acp);
18511754SKacheong.Poon@Sun.COM return (mp);
18611754SKacheong.Poon@Sun.COM }
18711754SKacheong.Poon@Sun.COM
18811754SKacheong.Poon@Sun.COM /*
18911754SKacheong.Poon@Sun.COM * Print a tcp_ioc_abort_conn_t structure.
19011754SKacheong.Poon@Sun.COM */
19111754SKacheong.Poon@Sun.COM static void
tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t * acp)19211754SKacheong.Poon@Sun.COM tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t *acp)
19311754SKacheong.Poon@Sun.COM {
19411754SKacheong.Poon@Sun.COM char lbuf[128];
19511754SKacheong.Poon@Sun.COM char rbuf[128];
19611754SKacheong.Poon@Sun.COM sa_family_t af;
19711754SKacheong.Poon@Sun.COM in_port_t lport, rport;
19811754SKacheong.Poon@Sun.COM ushort_t logflags;
19911754SKacheong.Poon@Sun.COM
20011754SKacheong.Poon@Sun.COM af = acp->ac_local.ss_family;
20111754SKacheong.Poon@Sun.COM
20211754SKacheong.Poon@Sun.COM if (af == AF_INET) {
20311754SKacheong.Poon@Sun.COM (void) inet_ntop(af, (const void *)&TCP_AC_V4LOCAL(acp),
20411754SKacheong.Poon@Sun.COM lbuf, 128);
20511754SKacheong.Poon@Sun.COM (void) inet_ntop(af, (const void *)&TCP_AC_V4REMOTE(acp),
20611754SKacheong.Poon@Sun.COM rbuf, 128);
20711754SKacheong.Poon@Sun.COM lport = ntohs(TCP_AC_V4LPORT(acp));
20811754SKacheong.Poon@Sun.COM rport = ntohs(TCP_AC_V4RPORT(acp));
20911754SKacheong.Poon@Sun.COM } else {
21011754SKacheong.Poon@Sun.COM (void) inet_ntop(af, (const void *)&TCP_AC_V6LOCAL(acp),
21111754SKacheong.Poon@Sun.COM lbuf, 128);
21211754SKacheong.Poon@Sun.COM (void) inet_ntop(af, (const void *)&TCP_AC_V6REMOTE(acp),
21311754SKacheong.Poon@Sun.COM rbuf, 128);
21411754SKacheong.Poon@Sun.COM lport = ntohs(TCP_AC_V6LPORT(acp));
21511754SKacheong.Poon@Sun.COM rport = ntohs(TCP_AC_V6RPORT(acp));
21611754SKacheong.Poon@Sun.COM }
21711754SKacheong.Poon@Sun.COM
21811754SKacheong.Poon@Sun.COM logflags = SL_TRACE | SL_NOTE;
21911754SKacheong.Poon@Sun.COM /*
22011754SKacheong.Poon@Sun.COM * Don't print this message to the console if the operation was done
22111754SKacheong.Poon@Sun.COM * to a non-global zone.
22211754SKacheong.Poon@Sun.COM */
22311754SKacheong.Poon@Sun.COM if (acp->ac_zoneid == GLOBAL_ZONEID || acp->ac_zoneid == ALL_ZONES)
22411754SKacheong.Poon@Sun.COM logflags |= SL_CONSOLE;
22511754SKacheong.Poon@Sun.COM (void) strlog(TCP_MOD_ID, 0, 1, logflags,
22611754SKacheong.Poon@Sun.COM "TCP_IOC_ABORT_CONN: local = %s:%d, remote = %s:%d, "
22711754SKacheong.Poon@Sun.COM "start = %d, end = %d\n", lbuf, lport, rbuf, rport,
22811754SKacheong.Poon@Sun.COM acp->ac_start, acp->ac_end);
22911754SKacheong.Poon@Sun.COM }
23011754SKacheong.Poon@Sun.COM
23111754SKacheong.Poon@Sun.COM /*
23211754SKacheong.Poon@Sun.COM * Called using SQ_FILL when a message built using
23311754SKacheong.Poon@Sun.COM * tcp_ioctl_abort_build_msg is put into a queue.
23411754SKacheong.Poon@Sun.COM * Note that when we get here there is no wildcard in acp any more.
23511754SKacheong.Poon@Sun.COM */
23611754SKacheong.Poon@Sun.COM /* ARGSUSED2 */
23711754SKacheong.Poon@Sun.COM static void
tcp_ioctl_abort_handler(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * dummy)23811754SKacheong.Poon@Sun.COM tcp_ioctl_abort_handler(void *arg, mblk_t *mp, void *arg2,
23911754SKacheong.Poon@Sun.COM ip_recv_attr_t *dummy)
24011754SKacheong.Poon@Sun.COM {
24111754SKacheong.Poon@Sun.COM conn_t *connp = (conn_t *)arg;
24211754SKacheong.Poon@Sun.COM tcp_t *tcp = connp->conn_tcp;
24311754SKacheong.Poon@Sun.COM tcp_ioc_abort_conn_t *acp;
24411754SKacheong.Poon@Sun.COM
24511754SKacheong.Poon@Sun.COM /*
24611754SKacheong.Poon@Sun.COM * Don't accept any input on a closed tcp as this TCP logically does
24711754SKacheong.Poon@Sun.COM * not exist on the system. Don't proceed further with this TCP.
24811754SKacheong.Poon@Sun.COM * For eg. this packet could trigger another close of this tcp
24911754SKacheong.Poon@Sun.COM * which would be disastrous for tcp_refcnt. tcp_close_detached /
25011754SKacheong.Poon@Sun.COM * tcp_clean_death / tcp_closei_local must be called at most once
25111754SKacheong.Poon@Sun.COM * on a TCP.
25211754SKacheong.Poon@Sun.COM */
25311754SKacheong.Poon@Sun.COM if (tcp->tcp_state == TCPS_CLOSED ||
25411754SKacheong.Poon@Sun.COM tcp->tcp_state == TCPS_BOUND) {
25511754SKacheong.Poon@Sun.COM freemsg(mp);
25611754SKacheong.Poon@Sun.COM return;
25711754SKacheong.Poon@Sun.COM }
25811754SKacheong.Poon@Sun.COM
25911754SKacheong.Poon@Sun.COM acp = (tcp_ioc_abort_conn_t *)(mp->b_rptr + sizeof (uint32_t));
26011754SKacheong.Poon@Sun.COM if (tcp->tcp_state <= acp->ac_end) {
26111754SKacheong.Poon@Sun.COM /*
26211754SKacheong.Poon@Sun.COM * If we get here, we are already on the correct
26311754SKacheong.Poon@Sun.COM * squeue. This ioctl follows the following path
26411754SKacheong.Poon@Sun.COM * tcp_wput -> tcp_wput_ioctl -> tcp_ioctl_abort_conn
26511754SKacheong.Poon@Sun.COM * ->tcp_ioctl_abort->squeue_enter (if on a
26611754SKacheong.Poon@Sun.COM * different squeue)
26711754SKacheong.Poon@Sun.COM */
26811754SKacheong.Poon@Sun.COM int errcode;
26911754SKacheong.Poon@Sun.COM
27011754SKacheong.Poon@Sun.COM TCP_AC_GET_ERRCODE(tcp->tcp_state, errcode);
27111754SKacheong.Poon@Sun.COM (void) tcp_clean_death(tcp, errcode);
27211754SKacheong.Poon@Sun.COM }
27311754SKacheong.Poon@Sun.COM freemsg(mp);
27411754SKacheong.Poon@Sun.COM }
27511754SKacheong.Poon@Sun.COM
27611754SKacheong.Poon@Sun.COM /*
27711754SKacheong.Poon@Sun.COM * Abort all matching connections on a hash chain.
27811754SKacheong.Poon@Sun.COM */
27911754SKacheong.Poon@Sun.COM static int
tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t * acp,int index,int * count,boolean_t exact,tcp_stack_t * tcps)28011754SKacheong.Poon@Sun.COM tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *acp, int index, int *count,
28111754SKacheong.Poon@Sun.COM boolean_t exact, tcp_stack_t *tcps)
28211754SKacheong.Poon@Sun.COM {
28311754SKacheong.Poon@Sun.COM int nmatch, err = 0;
28411754SKacheong.Poon@Sun.COM tcp_t *tcp;
28511754SKacheong.Poon@Sun.COM MBLKP mp, last, listhead = NULL;
28611754SKacheong.Poon@Sun.COM conn_t *tconnp;
28711754SKacheong.Poon@Sun.COM connf_t *connfp;
28811754SKacheong.Poon@Sun.COM ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
28911754SKacheong.Poon@Sun.COM
29011754SKacheong.Poon@Sun.COM connfp = &ipst->ips_ipcl_conn_fanout[index];
29111754SKacheong.Poon@Sun.COM
29211754SKacheong.Poon@Sun.COM startover:
29311754SKacheong.Poon@Sun.COM nmatch = 0;
29411754SKacheong.Poon@Sun.COM
29511754SKacheong.Poon@Sun.COM mutex_enter(&connfp->connf_lock);
29611754SKacheong.Poon@Sun.COM for (tconnp = connfp->connf_head; tconnp != NULL;
29711754SKacheong.Poon@Sun.COM tconnp = tconnp->conn_next) {
29811754SKacheong.Poon@Sun.COM tcp = tconnp->conn_tcp;
29911754SKacheong.Poon@Sun.COM /*
30011754SKacheong.Poon@Sun.COM * We are missing a check on sin6_scope_id for linklocals here,
30111754SKacheong.Poon@Sun.COM * but current usage is just for aborting based on zoneid
30211754SKacheong.Poon@Sun.COM * for shared-IP zones.
30311754SKacheong.Poon@Sun.COM */
30411754SKacheong.Poon@Sun.COM if (TCP_AC_MATCH(acp, tconnp, tcp)) {
30511754SKacheong.Poon@Sun.COM CONN_INC_REF(tconnp);
30611754SKacheong.Poon@Sun.COM mp = tcp_ioctl_abort_build_msg(acp, tcp);
30711754SKacheong.Poon@Sun.COM if (mp == NULL) {
30811754SKacheong.Poon@Sun.COM err = ENOMEM;
30911754SKacheong.Poon@Sun.COM CONN_DEC_REF(tconnp);
31011754SKacheong.Poon@Sun.COM break;
31111754SKacheong.Poon@Sun.COM }
31211754SKacheong.Poon@Sun.COM mp->b_prev = (mblk_t *)tcp;
31311754SKacheong.Poon@Sun.COM
31411754SKacheong.Poon@Sun.COM if (listhead == NULL) {
31511754SKacheong.Poon@Sun.COM listhead = mp;
31611754SKacheong.Poon@Sun.COM last = mp;
31711754SKacheong.Poon@Sun.COM } else {
31811754SKacheong.Poon@Sun.COM last->b_next = mp;
31911754SKacheong.Poon@Sun.COM last = mp;
32011754SKacheong.Poon@Sun.COM }
32111754SKacheong.Poon@Sun.COM nmatch++;
32211754SKacheong.Poon@Sun.COM if (exact)
32311754SKacheong.Poon@Sun.COM break;
32411754SKacheong.Poon@Sun.COM }
32511754SKacheong.Poon@Sun.COM
32611754SKacheong.Poon@Sun.COM /* Avoid holding lock for too long. */
32711754SKacheong.Poon@Sun.COM if (nmatch >= 500)
32811754SKacheong.Poon@Sun.COM break;
32911754SKacheong.Poon@Sun.COM }
33011754SKacheong.Poon@Sun.COM mutex_exit(&connfp->connf_lock);
33111754SKacheong.Poon@Sun.COM
33211754SKacheong.Poon@Sun.COM /* Pass mp into the correct tcp */
33311754SKacheong.Poon@Sun.COM while ((mp = listhead) != NULL) {
33411754SKacheong.Poon@Sun.COM listhead = listhead->b_next;
33511754SKacheong.Poon@Sun.COM tcp = (tcp_t *)mp->b_prev;
33611754SKacheong.Poon@Sun.COM mp->b_next = mp->b_prev = NULL;
33711754SKacheong.Poon@Sun.COM SQUEUE_ENTER_ONE(tcp->tcp_connp->conn_sqp, mp,
33811754SKacheong.Poon@Sun.COM tcp_ioctl_abort_handler, tcp->tcp_connp, NULL,
33911754SKacheong.Poon@Sun.COM SQ_FILL, SQTAG_TCP_ABORT_BUCKET);
34011754SKacheong.Poon@Sun.COM }
34111754SKacheong.Poon@Sun.COM
34211754SKacheong.Poon@Sun.COM *count += nmatch;
34311754SKacheong.Poon@Sun.COM if (nmatch >= 500 && err == 0)
34411754SKacheong.Poon@Sun.COM goto startover;
34511754SKacheong.Poon@Sun.COM return (err);
34611754SKacheong.Poon@Sun.COM }
34711754SKacheong.Poon@Sun.COM
34811754SKacheong.Poon@Sun.COM /*
34911754SKacheong.Poon@Sun.COM * Abort all connections that matches the attributes specified in acp.
35011754SKacheong.Poon@Sun.COM */
35111754SKacheong.Poon@Sun.COM static int
tcp_ioctl_abort(tcp_ioc_abort_conn_t * acp,tcp_stack_t * tcps)35211754SKacheong.Poon@Sun.COM tcp_ioctl_abort(tcp_ioc_abort_conn_t *acp, tcp_stack_t *tcps)
35311754SKacheong.Poon@Sun.COM {
35411754SKacheong.Poon@Sun.COM sa_family_t af;
35511754SKacheong.Poon@Sun.COM uint32_t ports;
35611754SKacheong.Poon@Sun.COM uint16_t *pports;
35711754SKacheong.Poon@Sun.COM int err = 0, count = 0;
35811754SKacheong.Poon@Sun.COM boolean_t exact = B_FALSE; /* set when there is no wildcard */
35911754SKacheong.Poon@Sun.COM int index = -1;
36011754SKacheong.Poon@Sun.COM ushort_t logflags;
36111754SKacheong.Poon@Sun.COM ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
36211754SKacheong.Poon@Sun.COM
36311754SKacheong.Poon@Sun.COM af = acp->ac_local.ss_family;
36411754SKacheong.Poon@Sun.COM
36511754SKacheong.Poon@Sun.COM if (af == AF_INET) {
36611754SKacheong.Poon@Sun.COM if (TCP_AC_V4REMOTE(acp) != INADDR_ANY &&
36711754SKacheong.Poon@Sun.COM TCP_AC_V4LPORT(acp) != 0 && TCP_AC_V4RPORT(acp) != 0) {
36811754SKacheong.Poon@Sun.COM pports = (uint16_t *)&ports;
36911754SKacheong.Poon@Sun.COM pports[1] = TCP_AC_V4LPORT(acp);
37011754SKacheong.Poon@Sun.COM pports[0] = TCP_AC_V4RPORT(acp);
37111754SKacheong.Poon@Sun.COM exact = (TCP_AC_V4LOCAL(acp) != INADDR_ANY);
37211754SKacheong.Poon@Sun.COM }
37311754SKacheong.Poon@Sun.COM } else {
37411754SKacheong.Poon@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6REMOTE(acp)) &&
37511754SKacheong.Poon@Sun.COM TCP_AC_V6LPORT(acp) != 0 && TCP_AC_V6RPORT(acp) != 0) {
37611754SKacheong.Poon@Sun.COM pports = (uint16_t *)&ports;
37711754SKacheong.Poon@Sun.COM pports[1] = TCP_AC_V6LPORT(acp);
37811754SKacheong.Poon@Sun.COM pports[0] = TCP_AC_V6RPORT(acp);
37911754SKacheong.Poon@Sun.COM exact = !IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6LOCAL(acp));
38011754SKacheong.Poon@Sun.COM }
38111754SKacheong.Poon@Sun.COM }
38211754SKacheong.Poon@Sun.COM
38311754SKacheong.Poon@Sun.COM /*
38411754SKacheong.Poon@Sun.COM * For cases where remote addr, local port, and remote port are non-
38511754SKacheong.Poon@Sun.COM * wildcards, tcp_ioctl_abort_bucket will only be called once.
38611754SKacheong.Poon@Sun.COM */
38711754SKacheong.Poon@Sun.COM if (index != -1) {
38811754SKacheong.Poon@Sun.COM err = tcp_ioctl_abort_bucket(acp, index,
38911754SKacheong.Poon@Sun.COM &count, exact, tcps);
39011754SKacheong.Poon@Sun.COM } else {
39111754SKacheong.Poon@Sun.COM /*
39211754SKacheong.Poon@Sun.COM * loop through all entries for wildcard case
39311754SKacheong.Poon@Sun.COM */
39411754SKacheong.Poon@Sun.COM for (index = 0;
39511754SKacheong.Poon@Sun.COM index < ipst->ips_ipcl_conn_fanout_size;
39611754SKacheong.Poon@Sun.COM index++) {
39711754SKacheong.Poon@Sun.COM err = tcp_ioctl_abort_bucket(acp, index,
39811754SKacheong.Poon@Sun.COM &count, exact, tcps);
39911754SKacheong.Poon@Sun.COM if (err != 0)
40011754SKacheong.Poon@Sun.COM break;
40111754SKacheong.Poon@Sun.COM }
40211754SKacheong.Poon@Sun.COM }
40311754SKacheong.Poon@Sun.COM
40411754SKacheong.Poon@Sun.COM logflags = SL_TRACE | SL_NOTE;
40511754SKacheong.Poon@Sun.COM /*
40611754SKacheong.Poon@Sun.COM * Don't print this message to the console if the operation was done
40711754SKacheong.Poon@Sun.COM * to a non-global zone.
40811754SKacheong.Poon@Sun.COM */
40911754SKacheong.Poon@Sun.COM if (acp->ac_zoneid == GLOBAL_ZONEID || acp->ac_zoneid == ALL_ZONES)
41011754SKacheong.Poon@Sun.COM logflags |= SL_CONSOLE;
41111754SKacheong.Poon@Sun.COM (void) strlog(TCP_MOD_ID, 0, 1, logflags, "TCP_IOC_ABORT_CONN: "
41211754SKacheong.Poon@Sun.COM "aborted %d connection%c\n", count, ((count > 1) ? 's' : ' '));
41311754SKacheong.Poon@Sun.COM if (err == 0 && count == 0)
41411754SKacheong.Poon@Sun.COM err = ENOENT;
41511754SKacheong.Poon@Sun.COM return (err);
41611754SKacheong.Poon@Sun.COM }
41711754SKacheong.Poon@Sun.COM
41811754SKacheong.Poon@Sun.COM /*
41911754SKacheong.Poon@Sun.COM * Process the TCP_IOC_ABORT_CONN ioctl request.
42011754SKacheong.Poon@Sun.COM */
42111754SKacheong.Poon@Sun.COM void
tcp_ioctl_abort_conn(queue_t * q,mblk_t * mp)42211754SKacheong.Poon@Sun.COM tcp_ioctl_abort_conn(queue_t *q, mblk_t *mp)
42311754SKacheong.Poon@Sun.COM {
42411754SKacheong.Poon@Sun.COM int err;
42511754SKacheong.Poon@Sun.COM IOCP iocp;
42611754SKacheong.Poon@Sun.COM MBLKP mp1;
42711754SKacheong.Poon@Sun.COM sa_family_t laf, raf;
42811754SKacheong.Poon@Sun.COM tcp_ioc_abort_conn_t *acp;
42911754SKacheong.Poon@Sun.COM zone_t *zptr;
43011754SKacheong.Poon@Sun.COM conn_t *connp = Q_TO_CONN(q);
43111754SKacheong.Poon@Sun.COM zoneid_t zoneid = connp->conn_zoneid;
43211754SKacheong.Poon@Sun.COM tcp_t *tcp = connp->conn_tcp;
43311754SKacheong.Poon@Sun.COM tcp_stack_t *tcps = tcp->tcp_tcps;
43411754SKacheong.Poon@Sun.COM
43511754SKacheong.Poon@Sun.COM iocp = (IOCP)mp->b_rptr;
43611754SKacheong.Poon@Sun.COM
43711754SKacheong.Poon@Sun.COM if ((mp1 = mp->b_cont) == NULL ||
43811754SKacheong.Poon@Sun.COM iocp->ioc_count != sizeof (tcp_ioc_abort_conn_t)) {
43911754SKacheong.Poon@Sun.COM err = EINVAL;
44011754SKacheong.Poon@Sun.COM goto out;
44111754SKacheong.Poon@Sun.COM }
44211754SKacheong.Poon@Sun.COM
44311754SKacheong.Poon@Sun.COM /* check permissions */
44411754SKacheong.Poon@Sun.COM if (secpolicy_ip_config(iocp->ioc_cr, B_FALSE) != 0) {
44511754SKacheong.Poon@Sun.COM err = EPERM;
44611754SKacheong.Poon@Sun.COM goto out;
44711754SKacheong.Poon@Sun.COM }
44811754SKacheong.Poon@Sun.COM
44911754SKacheong.Poon@Sun.COM if (mp1->b_cont != NULL) {
45011754SKacheong.Poon@Sun.COM freemsg(mp1->b_cont);
45111754SKacheong.Poon@Sun.COM mp1->b_cont = NULL;
45211754SKacheong.Poon@Sun.COM }
45311754SKacheong.Poon@Sun.COM
45411754SKacheong.Poon@Sun.COM acp = (tcp_ioc_abort_conn_t *)mp1->b_rptr;
45511754SKacheong.Poon@Sun.COM laf = acp->ac_local.ss_family;
45611754SKacheong.Poon@Sun.COM raf = acp->ac_remote.ss_family;
45711754SKacheong.Poon@Sun.COM
45811754SKacheong.Poon@Sun.COM /* check that a zone with the supplied zoneid exists */
45911754SKacheong.Poon@Sun.COM if (acp->ac_zoneid != GLOBAL_ZONEID && acp->ac_zoneid != ALL_ZONES) {
46011754SKacheong.Poon@Sun.COM zptr = zone_find_by_id(zoneid);
46111754SKacheong.Poon@Sun.COM if (zptr != NULL) {
46211754SKacheong.Poon@Sun.COM zone_rele(zptr);
46311754SKacheong.Poon@Sun.COM } else {
46411754SKacheong.Poon@Sun.COM err = EINVAL;
46511754SKacheong.Poon@Sun.COM goto out;
46611754SKacheong.Poon@Sun.COM }
46711754SKacheong.Poon@Sun.COM }
46811754SKacheong.Poon@Sun.COM
46911754SKacheong.Poon@Sun.COM /*
47011754SKacheong.Poon@Sun.COM * For exclusive stacks we set the zoneid to zero
47111754SKacheong.Poon@Sun.COM * to make TCP operate as if in the global zone.
47211754SKacheong.Poon@Sun.COM */
47311754SKacheong.Poon@Sun.COM if (tcps->tcps_netstack->netstack_stackid != GLOBAL_NETSTACKID)
47411754SKacheong.Poon@Sun.COM acp->ac_zoneid = GLOBAL_ZONEID;
47511754SKacheong.Poon@Sun.COM
47611754SKacheong.Poon@Sun.COM if (acp->ac_start < TCPS_SYN_SENT || acp->ac_end > TCPS_TIME_WAIT ||
47711754SKacheong.Poon@Sun.COM acp->ac_start > acp->ac_end || laf != raf ||
47811754SKacheong.Poon@Sun.COM (laf != AF_INET && laf != AF_INET6)) {
47911754SKacheong.Poon@Sun.COM err = EINVAL;
48011754SKacheong.Poon@Sun.COM goto out;
48111754SKacheong.Poon@Sun.COM }
48211754SKacheong.Poon@Sun.COM
48311754SKacheong.Poon@Sun.COM tcp_ioctl_abort_dump(acp);
48411754SKacheong.Poon@Sun.COM err = tcp_ioctl_abort(acp, tcps);
48511754SKacheong.Poon@Sun.COM
48611754SKacheong.Poon@Sun.COM out:
48711754SKacheong.Poon@Sun.COM if (mp1 != NULL) {
48811754SKacheong.Poon@Sun.COM freemsg(mp1);
48911754SKacheong.Poon@Sun.COM mp->b_cont = NULL;
49011754SKacheong.Poon@Sun.COM }
49111754SKacheong.Poon@Sun.COM
49211754SKacheong.Poon@Sun.COM if (err != 0)
49311754SKacheong.Poon@Sun.COM miocnak(q, mp, 0, err);
49411754SKacheong.Poon@Sun.COM else
49511754SKacheong.Poon@Sun.COM miocack(q, mp, 0, 0);
49611754SKacheong.Poon@Sun.COM }
49711754SKacheong.Poon@Sun.COM
49811754SKacheong.Poon@Sun.COM /*
49911754SKacheong.Poon@Sun.COM * Timeout function to reset the TCP stack variable tcps_reclaim to false.
50011754SKacheong.Poon@Sun.COM */
50111754SKacheong.Poon@Sun.COM void
tcp_reclaim_timer(void * arg)50211754SKacheong.Poon@Sun.COM tcp_reclaim_timer(void *arg)
50311754SKacheong.Poon@Sun.COM {
50411754SKacheong.Poon@Sun.COM tcp_stack_t *tcps = (tcp_stack_t *)arg;
50511754SKacheong.Poon@Sun.COM int64_t tot_conn = 0;
50611754SKacheong.Poon@Sun.COM int i;
50711754SKacheong.Poon@Sun.COM extern pgcnt_t lotsfree, needfree;
50811754SKacheong.Poon@Sun.COM
50911754SKacheong.Poon@Sun.COM for (i = 0; i < tcps->tcps_sc_cnt; i++)
51011754SKacheong.Poon@Sun.COM tot_conn += tcps->tcps_sc[i]->tcp_sc_conn_cnt;
51111754SKacheong.Poon@Sun.COM
51211754SKacheong.Poon@Sun.COM /*
51311754SKacheong.Poon@Sun.COM * This happens only when a stack is going away. tcps_reclaim_tid
51411754SKacheong.Poon@Sun.COM * should not be reset to 0 when returning in this case.
51511754SKacheong.Poon@Sun.COM */
51611754SKacheong.Poon@Sun.COM mutex_enter(&tcps->tcps_reclaim_lock);
51711754SKacheong.Poon@Sun.COM if (!tcps->tcps_reclaim) {
51811754SKacheong.Poon@Sun.COM mutex_exit(&tcps->tcps_reclaim_lock);
51911754SKacheong.Poon@Sun.COM return;
52011754SKacheong.Poon@Sun.COM }
52111754SKacheong.Poon@Sun.COM
52211754SKacheong.Poon@Sun.COM if ((freemem >= lotsfree + needfree) || tot_conn < maxusers) {
52311754SKacheong.Poon@Sun.COM tcps->tcps_reclaim = B_FALSE;
52411754SKacheong.Poon@Sun.COM tcps->tcps_reclaim_tid = 0;
52511754SKacheong.Poon@Sun.COM } else {
52611754SKacheong.Poon@Sun.COM /* Stay in defensive mode and restart the timer */
52711754SKacheong.Poon@Sun.COM tcps->tcps_reclaim_tid = timeout(tcp_reclaim_timer,
52811754SKacheong.Poon@Sun.COM tcps, MSEC_TO_TICK(tcps->tcps_reclaim_period));
52911754SKacheong.Poon@Sun.COM }
53011754SKacheong.Poon@Sun.COM mutex_exit(&tcps->tcps_reclaim_lock);
53111754SKacheong.Poon@Sun.COM }
53211754SKacheong.Poon@Sun.COM
53311754SKacheong.Poon@Sun.COM /*
53411754SKacheong.Poon@Sun.COM * Kmem reclaim call back function. When the system is under memory
53511754SKacheong.Poon@Sun.COM * pressure, we set the TCP stack variable tcps_reclaim to true. This
53611754SKacheong.Poon@Sun.COM * variable is reset to false after tcps_reclaim_period msecs. During this
53711754SKacheong.Poon@Sun.COM * period, TCP will be more aggressive in aborting connections not making
53811754SKacheong.Poon@Sun.COM * progress, meaning retransmitting for some time (tcp_early_abort seconds).
53911754SKacheong.Poon@Sun.COM * TCP will also not accept new connection request for those listeners whose
54011754SKacheong.Poon@Sun.COM * q or q0 is not empty.
54111754SKacheong.Poon@Sun.COM */
54211754SKacheong.Poon@Sun.COM /* ARGSUSED */
54311754SKacheong.Poon@Sun.COM void
tcp_conn_reclaim(void * arg)54411754SKacheong.Poon@Sun.COM tcp_conn_reclaim(void *arg)
54511754SKacheong.Poon@Sun.COM {
54611754SKacheong.Poon@Sun.COM netstack_handle_t nh;
54711754SKacheong.Poon@Sun.COM netstack_t *ns;
54811754SKacheong.Poon@Sun.COM tcp_stack_t *tcps;
54911754SKacheong.Poon@Sun.COM extern pgcnt_t lotsfree, needfree;
55011754SKacheong.Poon@Sun.COM
55111754SKacheong.Poon@Sun.COM if (!tcp_do_reclaim)
55211754SKacheong.Poon@Sun.COM return;
55311754SKacheong.Poon@Sun.COM
55411754SKacheong.Poon@Sun.COM /*
55511754SKacheong.Poon@Sun.COM * The reclaim function may be called even when the system is not
55611754SKacheong.Poon@Sun.COM * really under memory pressure.
55711754SKacheong.Poon@Sun.COM */
55811754SKacheong.Poon@Sun.COM if (freemem >= lotsfree + needfree)
55911754SKacheong.Poon@Sun.COM return;
56011754SKacheong.Poon@Sun.COM
56111754SKacheong.Poon@Sun.COM netstack_next_init(&nh);
56211754SKacheong.Poon@Sun.COM while ((ns = netstack_next(&nh)) != NULL) {
56311754SKacheong.Poon@Sun.COM int i;
56411754SKacheong.Poon@Sun.COM int64_t tot_conn = 0;
56511754SKacheong.Poon@Sun.COM
56611769SKacheong.Poon@Sun.COM /*
56711769SKacheong.Poon@Sun.COM * During boot time, the first netstack_t is created and
56811769SKacheong.Poon@Sun.COM * initialized before TCP has registered with the netstack
56911769SKacheong.Poon@Sun.COM * framework. If this reclaim function is called before TCP
57011769SKacheong.Poon@Sun.COM * has finished its initialization, netstack_next() will
57111769SKacheong.Poon@Sun.COM * return the first netstack_t (since its netstack_flags is
57211769SKacheong.Poon@Sun.COM * not NSF_UNINIT). And its netstack_tcp will be NULL. We
57311769SKacheong.Poon@Sun.COM * need to catch it.
57411769SKacheong.Poon@Sun.COM *
57511769SKacheong.Poon@Sun.COM * All subsequent netstack_t creation will not have this
57611769SKacheong.Poon@Sun.COM * problem since the initialization is not finished until TCP
57711769SKacheong.Poon@Sun.COM * has finished its own tcp_stack_t initialization. Hence
57811769SKacheong.Poon@Sun.COM * netstack_next() will not return one with NULL netstack_tcp.
57911769SKacheong.Poon@Sun.COM */
58011769SKacheong.Poon@Sun.COM if ((tcps = ns->netstack_tcp) == NULL) {
58111769SKacheong.Poon@Sun.COM netstack_rele(ns);
58211769SKacheong.Poon@Sun.COM continue;
58311769SKacheong.Poon@Sun.COM }
58411754SKacheong.Poon@Sun.COM
58511754SKacheong.Poon@Sun.COM /*
58611754SKacheong.Poon@Sun.COM * Even if the system is under memory pressure, the reason may
58711754SKacheong.Poon@Sun.COM * not be because of TCP activity. Check the number of
58811754SKacheong.Poon@Sun.COM * connections in each stack. If the number exceeds the
58911754SKacheong.Poon@Sun.COM * threshold (maxusers), turn on defensive mode.
59011754SKacheong.Poon@Sun.COM */
59111754SKacheong.Poon@Sun.COM for (i = 0; i < tcps->tcps_sc_cnt; i++)
59211754SKacheong.Poon@Sun.COM tot_conn += tcps->tcps_sc[i]->tcp_sc_conn_cnt;
59311754SKacheong.Poon@Sun.COM if (tot_conn < maxusers) {
59411754SKacheong.Poon@Sun.COM netstack_rele(ns);
59511754SKacheong.Poon@Sun.COM continue;
59611754SKacheong.Poon@Sun.COM }
59711754SKacheong.Poon@Sun.COM
59811754SKacheong.Poon@Sun.COM mutex_enter(&tcps->tcps_reclaim_lock);
59911754SKacheong.Poon@Sun.COM if (!tcps->tcps_reclaim) {
60011754SKacheong.Poon@Sun.COM tcps->tcps_reclaim = B_TRUE;
60111754SKacheong.Poon@Sun.COM tcps->tcps_reclaim_tid = timeout(tcp_reclaim_timer,
60211754SKacheong.Poon@Sun.COM tcps, MSEC_TO_TICK(tcps->tcps_reclaim_period));
60311754SKacheong.Poon@Sun.COM TCP_STAT(tcps, tcp_reclaim_cnt);
60411754SKacheong.Poon@Sun.COM }
60511754SKacheong.Poon@Sun.COM mutex_exit(&tcps->tcps_reclaim_lock);
60611754SKacheong.Poon@Sun.COM netstack_rele(ns);
60711754SKacheong.Poon@Sun.COM }
60811754SKacheong.Poon@Sun.COM netstack_next_fini(&nh);
60911754SKacheong.Poon@Sun.COM }
61011754SKacheong.Poon@Sun.COM
61111754SKacheong.Poon@Sun.COM /*
61211754SKacheong.Poon@Sun.COM * Given a tcp_stack_t and a port (in host byte order), find a listener
61311754SKacheong.Poon@Sun.COM * configuration for that port and return the ratio.
61411754SKacheong.Poon@Sun.COM */
61511754SKacheong.Poon@Sun.COM uint32_t
tcp_find_listener_conf(tcp_stack_t * tcps,in_port_t port)61611754SKacheong.Poon@Sun.COM tcp_find_listener_conf(tcp_stack_t *tcps, in_port_t port)
61711754SKacheong.Poon@Sun.COM {
61811754SKacheong.Poon@Sun.COM tcp_listener_t *tl;
61911754SKacheong.Poon@Sun.COM uint32_t ratio = 0;
62011754SKacheong.Poon@Sun.COM
62111754SKacheong.Poon@Sun.COM mutex_enter(&tcps->tcps_listener_conf_lock);
62211754SKacheong.Poon@Sun.COM for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
62311754SKacheong.Poon@Sun.COM tl = list_next(&tcps->tcps_listener_conf, tl)) {
62411754SKacheong.Poon@Sun.COM if (tl->tl_port == port) {
62511754SKacheong.Poon@Sun.COM ratio = tl->tl_ratio;
62611754SKacheong.Poon@Sun.COM break;
62711754SKacheong.Poon@Sun.COM }
62811754SKacheong.Poon@Sun.COM }
62911754SKacheong.Poon@Sun.COM mutex_exit(&tcps->tcps_listener_conf_lock);
63011754SKacheong.Poon@Sun.COM return (ratio);
63111754SKacheong.Poon@Sun.COM }
63211754SKacheong.Poon@Sun.COM
63311754SKacheong.Poon@Sun.COM /*
63411754SKacheong.Poon@Sun.COM * To remove all listener limit configuration in a tcp_stack_t.
63511754SKacheong.Poon@Sun.COM */
63611754SKacheong.Poon@Sun.COM void
tcp_listener_conf_cleanup(tcp_stack_t * tcps)63711754SKacheong.Poon@Sun.COM tcp_listener_conf_cleanup(tcp_stack_t *tcps)
63811754SKacheong.Poon@Sun.COM {
63911754SKacheong.Poon@Sun.COM tcp_listener_t *tl;
64011754SKacheong.Poon@Sun.COM
64111754SKacheong.Poon@Sun.COM mutex_enter(&tcps->tcps_listener_conf_lock);
64211754SKacheong.Poon@Sun.COM while ((tl = list_head(&tcps->tcps_listener_conf)) != NULL) {
64311754SKacheong.Poon@Sun.COM list_remove(&tcps->tcps_listener_conf, tl);
64411754SKacheong.Poon@Sun.COM kmem_free(tl, sizeof (tcp_listener_t));
64511754SKacheong.Poon@Sun.COM }
64611754SKacheong.Poon@Sun.COM mutex_destroy(&tcps->tcps_listener_conf_lock);
64711754SKacheong.Poon@Sun.COM list_destroy(&tcps->tcps_listener_conf);
64811754SKacheong.Poon@Sun.COM }
64911754SKacheong.Poon@Sun.COM
65011754SKacheong.Poon@Sun.COM /*
651*12869SKacheong.Poon@Sun.COM * When a CPU is added, we need to allocate the per CPU stats struct.
65211754SKacheong.Poon@Sun.COM */
653*12869SKacheong.Poon@Sun.COM void
tcp_stack_cpu_add(tcp_stack_t * tcps,processorid_t cpu_seqid)654*12869SKacheong.Poon@Sun.COM tcp_stack_cpu_add(tcp_stack_t *tcps, processorid_t cpu_seqid)
65511754SKacheong.Poon@Sun.COM {
65611754SKacheong.Poon@Sun.COM int i;
65711754SKacheong.Poon@Sun.COM
658*12869SKacheong.Poon@Sun.COM if (cpu_seqid < tcps->tcps_sc_cnt)
659*12869SKacheong.Poon@Sun.COM return;
660*12869SKacheong.Poon@Sun.COM for (i = tcps->tcps_sc_cnt; i <= cpu_seqid; i++) {
661*12869SKacheong.Poon@Sun.COM ASSERT(tcps->tcps_sc[i] == NULL);
662*12869SKacheong.Poon@Sun.COM tcps->tcps_sc[i] = kmem_zalloc(sizeof (tcp_stats_cpu_t),
663*12869SKacheong.Poon@Sun.COM KM_SLEEP);
66411754SKacheong.Poon@Sun.COM }
665*12869SKacheong.Poon@Sun.COM membar_producer();
666*12869SKacheong.Poon@Sun.COM tcps->tcps_sc_cnt = cpu_seqid + 1;
66711754SKacheong.Poon@Sun.COM }
66811754SKacheong.Poon@Sun.COM
66911754SKacheong.Poon@Sun.COM /*
67011754SKacheong.Poon@Sun.COM * Diagnostic routine used to return a string associated with the tcp state.
67111754SKacheong.Poon@Sun.COM * Note that if the caller does not supply a buffer, it will use an internal
67211754SKacheong.Poon@Sun.COM * static string. This means that if multiple threads call this function at
67311754SKacheong.Poon@Sun.COM * the same time, output can be corrupted... Note also that this function
67411754SKacheong.Poon@Sun.COM * does not check the size of the supplied buffer. The caller has to make
67511754SKacheong.Poon@Sun.COM * sure that it is big enough.
67611754SKacheong.Poon@Sun.COM */
67711754SKacheong.Poon@Sun.COM char *
tcp_display(tcp_t * tcp,char * sup_buf,char format)67811754SKacheong.Poon@Sun.COM tcp_display(tcp_t *tcp, char *sup_buf, char format)
67911754SKacheong.Poon@Sun.COM {
68011754SKacheong.Poon@Sun.COM char buf1[30];
68111754SKacheong.Poon@Sun.COM static char priv_buf[INET6_ADDRSTRLEN * 2 + 80];
68211754SKacheong.Poon@Sun.COM char *buf;
68311754SKacheong.Poon@Sun.COM char *cp;
68411754SKacheong.Poon@Sun.COM in6_addr_t local, remote;
68511754SKacheong.Poon@Sun.COM char local_addrbuf[INET6_ADDRSTRLEN];
68611754SKacheong.Poon@Sun.COM char remote_addrbuf[INET6_ADDRSTRLEN];
68711754SKacheong.Poon@Sun.COM conn_t *connp;
68811754SKacheong.Poon@Sun.COM
68911754SKacheong.Poon@Sun.COM if (sup_buf != NULL)
69011754SKacheong.Poon@Sun.COM buf = sup_buf;
69111754SKacheong.Poon@Sun.COM else
69211754SKacheong.Poon@Sun.COM buf = priv_buf;
69311754SKacheong.Poon@Sun.COM
69411754SKacheong.Poon@Sun.COM if (tcp == NULL)
69511754SKacheong.Poon@Sun.COM return ("NULL_TCP");
69611754SKacheong.Poon@Sun.COM
69711754SKacheong.Poon@Sun.COM connp = tcp->tcp_connp;
69811754SKacheong.Poon@Sun.COM switch (tcp->tcp_state) {
69911754SKacheong.Poon@Sun.COM case TCPS_CLOSED:
70011754SKacheong.Poon@Sun.COM cp = "TCP_CLOSED";
70111754SKacheong.Poon@Sun.COM break;
70211754SKacheong.Poon@Sun.COM case TCPS_IDLE:
70311754SKacheong.Poon@Sun.COM cp = "TCP_IDLE";
70411754SKacheong.Poon@Sun.COM break;
70511754SKacheong.Poon@Sun.COM case TCPS_BOUND:
70611754SKacheong.Poon@Sun.COM cp = "TCP_BOUND";
70711754SKacheong.Poon@Sun.COM break;
70811754SKacheong.Poon@Sun.COM case TCPS_LISTEN:
70911754SKacheong.Poon@Sun.COM cp = "TCP_LISTEN";
71011754SKacheong.Poon@Sun.COM break;
71111754SKacheong.Poon@Sun.COM case TCPS_SYN_SENT:
71211754SKacheong.Poon@Sun.COM cp = "TCP_SYN_SENT";
71311754SKacheong.Poon@Sun.COM break;
71411754SKacheong.Poon@Sun.COM case TCPS_SYN_RCVD:
71511754SKacheong.Poon@Sun.COM cp = "TCP_SYN_RCVD";
71611754SKacheong.Poon@Sun.COM break;
71711754SKacheong.Poon@Sun.COM case TCPS_ESTABLISHED:
71811754SKacheong.Poon@Sun.COM cp = "TCP_ESTABLISHED";
71911754SKacheong.Poon@Sun.COM break;
72011754SKacheong.Poon@Sun.COM case TCPS_CLOSE_WAIT:
72111754SKacheong.Poon@Sun.COM cp = "TCP_CLOSE_WAIT";
72211754SKacheong.Poon@Sun.COM break;
72311754SKacheong.Poon@Sun.COM case TCPS_FIN_WAIT_1:
72411754SKacheong.Poon@Sun.COM cp = "TCP_FIN_WAIT_1";
72511754SKacheong.Poon@Sun.COM break;
72611754SKacheong.Poon@Sun.COM case TCPS_CLOSING:
72711754SKacheong.Poon@Sun.COM cp = "TCP_CLOSING";
72811754SKacheong.Poon@Sun.COM break;
72911754SKacheong.Poon@Sun.COM case TCPS_LAST_ACK:
73011754SKacheong.Poon@Sun.COM cp = "TCP_LAST_ACK";
73111754SKacheong.Poon@Sun.COM break;
73211754SKacheong.Poon@Sun.COM case TCPS_FIN_WAIT_2:
73311754SKacheong.Poon@Sun.COM cp = "TCP_FIN_WAIT_2";
73411754SKacheong.Poon@Sun.COM break;
73511754SKacheong.Poon@Sun.COM case TCPS_TIME_WAIT:
73611754SKacheong.Poon@Sun.COM cp = "TCP_TIME_WAIT";
73711754SKacheong.Poon@Sun.COM break;
73811754SKacheong.Poon@Sun.COM default:
73911754SKacheong.Poon@Sun.COM (void) mi_sprintf(buf1, "TCPUnkState(%d)", tcp->tcp_state);
74011754SKacheong.Poon@Sun.COM cp = buf1;
74111754SKacheong.Poon@Sun.COM break;
74211754SKacheong.Poon@Sun.COM }
74311754SKacheong.Poon@Sun.COM switch (format) {
74411754SKacheong.Poon@Sun.COM case DISP_ADDR_AND_PORT:
74511754SKacheong.Poon@Sun.COM if (connp->conn_ipversion == IPV4_VERSION) {
74611754SKacheong.Poon@Sun.COM /*
74711754SKacheong.Poon@Sun.COM * Note that we use the remote address in the tcp_b
74811754SKacheong.Poon@Sun.COM * structure. This means that it will print out
74911754SKacheong.Poon@Sun.COM * the real destination address, not the next hop's
75011754SKacheong.Poon@Sun.COM * address if source routing is used.
75111754SKacheong.Poon@Sun.COM */
75211754SKacheong.Poon@Sun.COM IN6_IPADDR_TO_V4MAPPED(connp->conn_laddr_v4, &local);
75311754SKacheong.Poon@Sun.COM IN6_IPADDR_TO_V4MAPPED(connp->conn_faddr_v4, &remote);
75411754SKacheong.Poon@Sun.COM
75511754SKacheong.Poon@Sun.COM } else {
75611754SKacheong.Poon@Sun.COM local = connp->conn_laddr_v6;
75711754SKacheong.Poon@Sun.COM remote = connp->conn_faddr_v6;
75811754SKacheong.Poon@Sun.COM }
75911754SKacheong.Poon@Sun.COM (void) inet_ntop(AF_INET6, &local, local_addrbuf,
76011754SKacheong.Poon@Sun.COM sizeof (local_addrbuf));
76111754SKacheong.Poon@Sun.COM (void) inet_ntop(AF_INET6, &remote, remote_addrbuf,
76211754SKacheong.Poon@Sun.COM sizeof (remote_addrbuf));
76311754SKacheong.Poon@Sun.COM (void) mi_sprintf(buf, "[%s.%u, %s.%u] %s",
76411754SKacheong.Poon@Sun.COM local_addrbuf, ntohs(connp->conn_lport), remote_addrbuf,
76511754SKacheong.Poon@Sun.COM ntohs(connp->conn_fport), cp);
76611754SKacheong.Poon@Sun.COM break;
76711754SKacheong.Poon@Sun.COM case DISP_PORT_ONLY:
76811754SKacheong.Poon@Sun.COM default:
76911754SKacheong.Poon@Sun.COM (void) mi_sprintf(buf, "[%u, %u] %s",
77011754SKacheong.Poon@Sun.COM ntohs(connp->conn_lport), ntohs(connp->conn_fport), cp);
77111754SKacheong.Poon@Sun.COM break;
77211754SKacheong.Poon@Sun.COM }
77311754SKacheong.Poon@Sun.COM
77411754SKacheong.Poon@Sun.COM return (buf);
77511754SKacheong.Poon@Sun.COM }
776