1*12869SKacheong.Poon@Sun.COM /*
2*12869SKacheong.Poon@Sun.COM * CDDL HEADER START
3*12869SKacheong.Poon@Sun.COM *
4*12869SKacheong.Poon@Sun.COM * The contents of this file are subject to the terms of the
5*12869SKacheong.Poon@Sun.COM * Common Development and Distribution License (the "License").
6*12869SKacheong.Poon@Sun.COM * You may not use this file except in compliance with the License.
7*12869SKacheong.Poon@Sun.COM *
8*12869SKacheong.Poon@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*12869SKacheong.Poon@Sun.COM * or http://www.opensolaris.org/os/licensing.
10*12869SKacheong.Poon@Sun.COM * See the License for the specific language governing permissions
11*12869SKacheong.Poon@Sun.COM * and limitations under the License.
12*12869SKacheong.Poon@Sun.COM *
13*12869SKacheong.Poon@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each
14*12869SKacheong.Poon@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*12869SKacheong.Poon@Sun.COM * If applicable, add the following below this CDDL HEADER, with the
16*12869SKacheong.Poon@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying
17*12869SKacheong.Poon@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner]
18*12869SKacheong.Poon@Sun.COM *
19*12869SKacheong.Poon@Sun.COM * CDDL HEADER END
20*12869SKacheong.Poon@Sun.COM */
21*12869SKacheong.Poon@Sun.COM
22*12869SKacheong.Poon@Sun.COM /*
23*12869SKacheong.Poon@Sun.COM * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24*12869SKacheong.Poon@Sun.COM */
25*12869SKacheong.Poon@Sun.COM
26*12869SKacheong.Poon@Sun.COM #include <sys/types.h>
27*12869SKacheong.Poon@Sun.COM #include <inet/common.h>
28*12869SKacheong.Poon@Sun.COM #include "sctp_impl.h"
29*12869SKacheong.Poon@Sun.COM
30*12869SKacheong.Poon@Sun.COM /* Control whether SCTP can enter defensive mode when under memory pressure. */
31*12869SKacheong.Poon@Sun.COM static boolean_t sctp_do_reclaim = B_TRUE;
32*12869SKacheong.Poon@Sun.COM
33*12869SKacheong.Poon@Sun.COM static void sctp_reclaim_timer(void *);
34*12869SKacheong.Poon@Sun.COM
35*12869SKacheong.Poon@Sun.COM /* Diagnostic routine used to return a string associated with the sctp state. */
36*12869SKacheong.Poon@Sun.COM char *
sctp_display(sctp_t * sctp,char * sup_buf)37*12869SKacheong.Poon@Sun.COM sctp_display(sctp_t *sctp, char *sup_buf)
38*12869SKacheong.Poon@Sun.COM {
39*12869SKacheong.Poon@Sun.COM char *buf;
40*12869SKacheong.Poon@Sun.COM char buf1[30];
41*12869SKacheong.Poon@Sun.COM static char priv_buf[INET6_ADDRSTRLEN * 2 + 80];
42*12869SKacheong.Poon@Sun.COM char *cp;
43*12869SKacheong.Poon@Sun.COM conn_t *connp;
44*12869SKacheong.Poon@Sun.COM
45*12869SKacheong.Poon@Sun.COM if (sctp == NULL)
46*12869SKacheong.Poon@Sun.COM return ("NULL_SCTP");
47*12869SKacheong.Poon@Sun.COM
48*12869SKacheong.Poon@Sun.COM connp = sctp->sctp_connp;
49*12869SKacheong.Poon@Sun.COM buf = (sup_buf != NULL) ? sup_buf : priv_buf;
50*12869SKacheong.Poon@Sun.COM
51*12869SKacheong.Poon@Sun.COM switch (sctp->sctp_state) {
52*12869SKacheong.Poon@Sun.COM case SCTPS_IDLE:
53*12869SKacheong.Poon@Sun.COM cp = "SCTP_IDLE";
54*12869SKacheong.Poon@Sun.COM break;
55*12869SKacheong.Poon@Sun.COM case SCTPS_BOUND:
56*12869SKacheong.Poon@Sun.COM cp = "SCTP_BOUND";
57*12869SKacheong.Poon@Sun.COM break;
58*12869SKacheong.Poon@Sun.COM case SCTPS_LISTEN:
59*12869SKacheong.Poon@Sun.COM cp = "SCTP_LISTEN";
60*12869SKacheong.Poon@Sun.COM break;
61*12869SKacheong.Poon@Sun.COM case SCTPS_COOKIE_WAIT:
62*12869SKacheong.Poon@Sun.COM cp = "SCTP_COOKIE_WAIT";
63*12869SKacheong.Poon@Sun.COM break;
64*12869SKacheong.Poon@Sun.COM case SCTPS_COOKIE_ECHOED:
65*12869SKacheong.Poon@Sun.COM cp = "SCTP_COOKIE_ECHOED";
66*12869SKacheong.Poon@Sun.COM break;
67*12869SKacheong.Poon@Sun.COM case SCTPS_ESTABLISHED:
68*12869SKacheong.Poon@Sun.COM cp = "SCTP_ESTABLISHED";
69*12869SKacheong.Poon@Sun.COM break;
70*12869SKacheong.Poon@Sun.COM case SCTPS_SHUTDOWN_PENDING:
71*12869SKacheong.Poon@Sun.COM cp = "SCTP_SHUTDOWN_PENDING";
72*12869SKacheong.Poon@Sun.COM break;
73*12869SKacheong.Poon@Sun.COM case SCTPS_SHUTDOWN_SENT:
74*12869SKacheong.Poon@Sun.COM cp = "SCTPS_SHUTDOWN_SENT";
75*12869SKacheong.Poon@Sun.COM break;
76*12869SKacheong.Poon@Sun.COM case SCTPS_SHUTDOWN_RECEIVED:
77*12869SKacheong.Poon@Sun.COM cp = "SCTPS_SHUTDOWN_RECEIVED";
78*12869SKacheong.Poon@Sun.COM break;
79*12869SKacheong.Poon@Sun.COM case SCTPS_SHUTDOWN_ACK_SENT:
80*12869SKacheong.Poon@Sun.COM cp = "SCTPS_SHUTDOWN_ACK_SENT";
81*12869SKacheong.Poon@Sun.COM break;
82*12869SKacheong.Poon@Sun.COM default:
83*12869SKacheong.Poon@Sun.COM (void) mi_sprintf(buf1, "SCTPUnkState(%d)", sctp->sctp_state);
84*12869SKacheong.Poon@Sun.COM cp = buf1;
85*12869SKacheong.Poon@Sun.COM break;
86*12869SKacheong.Poon@Sun.COM }
87*12869SKacheong.Poon@Sun.COM (void) mi_sprintf(buf, "[%u, %u] %s",
88*12869SKacheong.Poon@Sun.COM ntohs(connp->conn_lport), ntohs(connp->conn_fport), cp);
89*12869SKacheong.Poon@Sun.COM
90*12869SKacheong.Poon@Sun.COM return (buf);
91*12869SKacheong.Poon@Sun.COM }
92*12869SKacheong.Poon@Sun.COM
93*12869SKacheong.Poon@Sun.COM void
sctp_display_all(sctp_stack_t * sctps)94*12869SKacheong.Poon@Sun.COM sctp_display_all(sctp_stack_t *sctps)
95*12869SKacheong.Poon@Sun.COM {
96*12869SKacheong.Poon@Sun.COM sctp_t *sctp_walker;
97*12869SKacheong.Poon@Sun.COM
98*12869SKacheong.Poon@Sun.COM mutex_enter(&sctps->sctps_g_lock);
99*12869SKacheong.Poon@Sun.COM for (sctp_walker = list_head(&sctps->sctps_g_list);
100*12869SKacheong.Poon@Sun.COM sctp_walker != NULL;
101*12869SKacheong.Poon@Sun.COM sctp_walker = (sctp_t *)list_next(&sctps->sctps_g_list,
102*12869SKacheong.Poon@Sun.COM sctp_walker)) {
103*12869SKacheong.Poon@Sun.COM (void) sctp_display(sctp_walker, NULL);
104*12869SKacheong.Poon@Sun.COM }
105*12869SKacheong.Poon@Sun.COM mutex_exit(&sctps->sctps_g_lock);
106*12869SKacheong.Poon@Sun.COM }
107*12869SKacheong.Poon@Sun.COM
108*12869SKacheong.Poon@Sun.COM /*
109*12869SKacheong.Poon@Sun.COM * Given a sctp_stack_t and a port (in host byte order), find a listener
110*12869SKacheong.Poon@Sun.COM * configuration for that port and return the ratio.
111*12869SKacheong.Poon@Sun.COM */
112*12869SKacheong.Poon@Sun.COM uint32_t
sctp_find_listener_conf(sctp_stack_t * sctps,in_port_t port)113*12869SKacheong.Poon@Sun.COM sctp_find_listener_conf(sctp_stack_t *sctps, in_port_t port)
114*12869SKacheong.Poon@Sun.COM {
115*12869SKacheong.Poon@Sun.COM sctp_listener_t *sl;
116*12869SKacheong.Poon@Sun.COM uint32_t ratio = 0;
117*12869SKacheong.Poon@Sun.COM
118*12869SKacheong.Poon@Sun.COM mutex_enter(&sctps->sctps_listener_conf_lock);
119*12869SKacheong.Poon@Sun.COM for (sl = list_head(&sctps->sctps_listener_conf); sl != NULL;
120*12869SKacheong.Poon@Sun.COM sl = list_next(&sctps->sctps_listener_conf, sl)) {
121*12869SKacheong.Poon@Sun.COM if (sl->sl_port == port) {
122*12869SKacheong.Poon@Sun.COM ratio = sl->sl_ratio;
123*12869SKacheong.Poon@Sun.COM break;
124*12869SKacheong.Poon@Sun.COM }
125*12869SKacheong.Poon@Sun.COM }
126*12869SKacheong.Poon@Sun.COM mutex_exit(&sctps->sctps_listener_conf_lock);
127*12869SKacheong.Poon@Sun.COM return (ratio);
128*12869SKacheong.Poon@Sun.COM }
129*12869SKacheong.Poon@Sun.COM
130*12869SKacheong.Poon@Sun.COM /*
131*12869SKacheong.Poon@Sun.COM * To remove all listener limit configuration in a sctp_stack_t.
132*12869SKacheong.Poon@Sun.COM */
133*12869SKacheong.Poon@Sun.COM void
sctp_listener_conf_cleanup(sctp_stack_t * sctps)134*12869SKacheong.Poon@Sun.COM sctp_listener_conf_cleanup(sctp_stack_t *sctps)
135*12869SKacheong.Poon@Sun.COM {
136*12869SKacheong.Poon@Sun.COM sctp_listener_t *sl;
137*12869SKacheong.Poon@Sun.COM
138*12869SKacheong.Poon@Sun.COM mutex_enter(&sctps->sctps_listener_conf_lock);
139*12869SKacheong.Poon@Sun.COM while ((sl = list_head(&sctps->sctps_listener_conf)) != NULL) {
140*12869SKacheong.Poon@Sun.COM list_remove(&sctps->sctps_listener_conf, sl);
141*12869SKacheong.Poon@Sun.COM kmem_free(sl, sizeof (sctp_listener_t));
142*12869SKacheong.Poon@Sun.COM }
143*12869SKacheong.Poon@Sun.COM mutex_destroy(&sctps->sctps_listener_conf_lock);
144*12869SKacheong.Poon@Sun.COM list_destroy(&sctps->sctps_listener_conf);
145*12869SKacheong.Poon@Sun.COM }
146*12869SKacheong.Poon@Sun.COM
147*12869SKacheong.Poon@Sun.COM
148*12869SKacheong.Poon@Sun.COM /*
149*12869SKacheong.Poon@Sun.COM * Timeout function to reset the SCTP stack variable sctps_reclaim to false.
150*12869SKacheong.Poon@Sun.COM */
151*12869SKacheong.Poon@Sun.COM static void
sctp_reclaim_timer(void * arg)152*12869SKacheong.Poon@Sun.COM sctp_reclaim_timer(void *arg)
153*12869SKacheong.Poon@Sun.COM {
154*12869SKacheong.Poon@Sun.COM sctp_stack_t *sctps = (sctp_stack_t *)arg;
155*12869SKacheong.Poon@Sun.COM int64_t tot_assoc = 0;
156*12869SKacheong.Poon@Sun.COM int i;
157*12869SKacheong.Poon@Sun.COM extern pgcnt_t lotsfree, needfree;
158*12869SKacheong.Poon@Sun.COM
159*12869SKacheong.Poon@Sun.COM for (i = 0; i < sctps->sctps_sc_cnt; i++)
160*12869SKacheong.Poon@Sun.COM tot_assoc += sctps->sctps_sc[i]->sctp_sc_assoc_cnt;
161*12869SKacheong.Poon@Sun.COM
162*12869SKacheong.Poon@Sun.COM /*
163*12869SKacheong.Poon@Sun.COM * This happens only when a stack is going away. sctps_reclaim_tid
164*12869SKacheong.Poon@Sun.COM * should not be reset to 0 when returning in this case.
165*12869SKacheong.Poon@Sun.COM */
166*12869SKacheong.Poon@Sun.COM mutex_enter(&sctps->sctps_reclaim_lock);
167*12869SKacheong.Poon@Sun.COM if (!sctps->sctps_reclaim) {
168*12869SKacheong.Poon@Sun.COM mutex_exit(&sctps->sctps_reclaim_lock);
169*12869SKacheong.Poon@Sun.COM return;
170*12869SKacheong.Poon@Sun.COM }
171*12869SKacheong.Poon@Sun.COM
172*12869SKacheong.Poon@Sun.COM if ((freemem >= lotsfree + needfree) || tot_assoc < maxusers) {
173*12869SKacheong.Poon@Sun.COM sctps->sctps_reclaim = B_FALSE;
174*12869SKacheong.Poon@Sun.COM sctps->sctps_reclaim_tid = 0;
175*12869SKacheong.Poon@Sun.COM } else {
176*12869SKacheong.Poon@Sun.COM /* Stay in defensive mode and restart the timer */
177*12869SKacheong.Poon@Sun.COM sctps->sctps_reclaim_tid = timeout(sctp_reclaim_timer,
178*12869SKacheong.Poon@Sun.COM sctps, MSEC_TO_TICK(sctps->sctps_reclaim_period));
179*12869SKacheong.Poon@Sun.COM }
180*12869SKacheong.Poon@Sun.COM mutex_exit(&sctps->sctps_reclaim_lock);
181*12869SKacheong.Poon@Sun.COM }
182*12869SKacheong.Poon@Sun.COM
183*12869SKacheong.Poon@Sun.COM /*
184*12869SKacheong.Poon@Sun.COM * Kmem reclaim call back function. When the system is under memory
185*12869SKacheong.Poon@Sun.COM * pressure, we set the SCTP stack variable sctps_reclaim to true. This
186*12869SKacheong.Poon@Sun.COM * variable is reset to false after sctps_reclaim_period msecs. During this
187*12869SKacheong.Poon@Sun.COM * period, SCTP will be more aggressive in aborting connections not making
188*12869SKacheong.Poon@Sun.COM * progress, meaning retransmitting for shorter time (sctp_pa_early_abort/
189*12869SKacheong.Poon@Sun.COM * sctp_pp_early_abort number of strikes).
190*12869SKacheong.Poon@Sun.COM */
191*12869SKacheong.Poon@Sun.COM /* ARGSUSED */
192*12869SKacheong.Poon@Sun.COM void
sctp_conn_reclaim(void * arg)193*12869SKacheong.Poon@Sun.COM sctp_conn_reclaim(void *arg)
194*12869SKacheong.Poon@Sun.COM {
195*12869SKacheong.Poon@Sun.COM netstack_handle_t nh;
196*12869SKacheong.Poon@Sun.COM netstack_t *ns;
197*12869SKacheong.Poon@Sun.COM sctp_stack_t *sctps;
198*12869SKacheong.Poon@Sun.COM extern pgcnt_t lotsfree, needfree;
199*12869SKacheong.Poon@Sun.COM
200*12869SKacheong.Poon@Sun.COM if (!sctp_do_reclaim)
201*12869SKacheong.Poon@Sun.COM return;
202*12869SKacheong.Poon@Sun.COM
203*12869SKacheong.Poon@Sun.COM /*
204*12869SKacheong.Poon@Sun.COM * The reclaim function may be called even when the system is not
205*12869SKacheong.Poon@Sun.COM * really under memory pressure.
206*12869SKacheong.Poon@Sun.COM */
207*12869SKacheong.Poon@Sun.COM if (freemem >= lotsfree + needfree)
208*12869SKacheong.Poon@Sun.COM return;
209*12869SKacheong.Poon@Sun.COM
210*12869SKacheong.Poon@Sun.COM netstack_next_init(&nh);
211*12869SKacheong.Poon@Sun.COM while ((ns = netstack_next(&nh)) != NULL) {
212*12869SKacheong.Poon@Sun.COM int i;
213*12869SKacheong.Poon@Sun.COM int64_t tot_assoc = 0;
214*12869SKacheong.Poon@Sun.COM
215*12869SKacheong.Poon@Sun.COM /*
216*12869SKacheong.Poon@Sun.COM * During boot time, the first netstack_t is created and
217*12869SKacheong.Poon@Sun.COM * initialized before SCTP has registered with the netstack
218*12869SKacheong.Poon@Sun.COM * framework. If this reclaim function is called before SCTP
219*12869SKacheong.Poon@Sun.COM * has finished its initialization, netstack_next() will
220*12869SKacheong.Poon@Sun.COM * return the first netstack_t (since its netstack_flags is
221*12869SKacheong.Poon@Sun.COM * not NSF_UNINIT). And its netstack_sctp will be NULL. We
222*12869SKacheong.Poon@Sun.COM * need to catch it.
223*12869SKacheong.Poon@Sun.COM *
224*12869SKacheong.Poon@Sun.COM * All subsequent netstack_t creation will not have this
225*12869SKacheong.Poon@Sun.COM * problem since the initialization is not finished until SCTP
226*12869SKacheong.Poon@Sun.COM * has finished its own sctp_stack_t initialization. Hence
227*12869SKacheong.Poon@Sun.COM * netstack_next() will not return one with NULL netstack_sctp.
228*12869SKacheong.Poon@Sun.COM */
229*12869SKacheong.Poon@Sun.COM if ((sctps = ns->netstack_sctp) == NULL) {
230*12869SKacheong.Poon@Sun.COM netstack_rele(ns);
231*12869SKacheong.Poon@Sun.COM continue;
232*12869SKacheong.Poon@Sun.COM }
233*12869SKacheong.Poon@Sun.COM
234*12869SKacheong.Poon@Sun.COM /*
235*12869SKacheong.Poon@Sun.COM * Even if the system is under memory pressure, the reason may
236*12869SKacheong.Poon@Sun.COM * not be because of SCTP activity. Check the number of
237*12869SKacheong.Poon@Sun.COM * associations in each stack. If the number exceeds the
238*12869SKacheong.Poon@Sun.COM * threshold (maxusers), turn on defensive mode.
239*12869SKacheong.Poon@Sun.COM */
240*12869SKacheong.Poon@Sun.COM for (i = 0; i < sctps->sctps_sc_cnt; i++)
241*12869SKacheong.Poon@Sun.COM tot_assoc += sctps->sctps_sc[i]->sctp_sc_assoc_cnt;
242*12869SKacheong.Poon@Sun.COM if (tot_assoc < maxusers) {
243*12869SKacheong.Poon@Sun.COM netstack_rele(ns);
244*12869SKacheong.Poon@Sun.COM continue;
245*12869SKacheong.Poon@Sun.COM }
246*12869SKacheong.Poon@Sun.COM
247*12869SKacheong.Poon@Sun.COM mutex_enter(&sctps->sctps_reclaim_lock);
248*12869SKacheong.Poon@Sun.COM if (!sctps->sctps_reclaim) {
249*12869SKacheong.Poon@Sun.COM sctps->sctps_reclaim = B_TRUE;
250*12869SKacheong.Poon@Sun.COM sctps->sctps_reclaim_tid = timeout(sctp_reclaim_timer,
251*12869SKacheong.Poon@Sun.COM sctps, MSEC_TO_TICK(sctps->sctps_reclaim_period));
252*12869SKacheong.Poon@Sun.COM SCTP_KSTAT(sctps, sctp_reclaim_cnt);
253*12869SKacheong.Poon@Sun.COM }
254*12869SKacheong.Poon@Sun.COM mutex_exit(&sctps->sctps_reclaim_lock);
255*12869SKacheong.Poon@Sun.COM netstack_rele(ns);
256*12869SKacheong.Poon@Sun.COM }
257*12869SKacheong.Poon@Sun.COM netstack_next_fini(&nh);
258*12869SKacheong.Poon@Sun.COM }
259*12869SKacheong.Poon@Sun.COM
260*12869SKacheong.Poon@Sun.COM /*
261*12869SKacheong.Poon@Sun.COM * When a CPU is added, we need to allocate the per CPU stats struct.
262*12869SKacheong.Poon@Sun.COM */
263*12869SKacheong.Poon@Sun.COM void
sctp_stack_cpu_add(sctp_stack_t * sctps,processorid_t cpu_seqid)264*12869SKacheong.Poon@Sun.COM sctp_stack_cpu_add(sctp_stack_t *sctps, processorid_t cpu_seqid)
265*12869SKacheong.Poon@Sun.COM {
266*12869SKacheong.Poon@Sun.COM int i;
267*12869SKacheong.Poon@Sun.COM
268*12869SKacheong.Poon@Sun.COM if (cpu_seqid < sctps->sctps_sc_cnt)
269*12869SKacheong.Poon@Sun.COM return;
270*12869SKacheong.Poon@Sun.COM for (i = sctps->sctps_sc_cnt; i <= cpu_seqid; i++) {
271*12869SKacheong.Poon@Sun.COM ASSERT(sctps->sctps_sc[i] == NULL);
272*12869SKacheong.Poon@Sun.COM sctps->sctps_sc[i] = kmem_zalloc(sizeof (sctp_stats_cpu_t),
273*12869SKacheong.Poon@Sun.COM KM_SLEEP);
274*12869SKacheong.Poon@Sun.COM }
275*12869SKacheong.Poon@Sun.COM membar_producer();
276*12869SKacheong.Poon@Sun.COM sctps->sctps_sc_cnt = cpu_seqid + 1;
277*12869SKacheong.Poon@Sun.COM }
278