1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /* Copyright (c) 1990 Mentat Inc. */
25
26 #include <inet/ip.h>
27 #include <inet/tcp_impl.h>
28 #include <sys/multidata.h>
29 #include <sys/sunddi.h>
30
31 /* Max size IP datagram is 64k - 1 */
32 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
33 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
34
35 /* Max of the above */
36 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4
37
38 #define TCP_XMIT_LOWATER 4096
39 #define TCP_XMIT_HIWATER 49152
40 #define TCP_RECV_LOWATER 2048
41 #define TCP_RECV_HIWATER 128000
42
43 /*
44 * Set the RFC 1948 pass phrase
45 */
46 /* ARGSUSED */
47 static int
tcp_set_1948phrase(void * cbarg,cred_t * cr,mod_prop_info_t * pinfo,const char * ifname,const void * pr_val,uint_t flags)48 tcp_set_1948phrase(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
49 const char *ifname, const void* pr_val, uint_t flags)
50 {
51 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
52
53 if (flags & MOD_PROP_DEFAULT)
54 return (ENOTSUP);
55
56 /*
57 * Basically, value contains a new pass phrase. Pass it along!
58 */
59 tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), tcps);
60 return (0);
61 }
62
63 /*
64 * returns the current list of listener limit configuration.
65 */
66 /* ARGSUSED */
67 static int
tcp_listener_conf_get(void * cbarg,mod_prop_info_t * pinfo,const char * ifname,void * val,uint_t psize,uint_t flags)68 tcp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname,
69 void *val, uint_t psize, uint_t flags)
70 {
71 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
72 tcp_listener_t *tl;
73 char *pval = val;
74 size_t nbytes = 0, tbytes = 0;
75 uint_t size;
76 int err = 0;
77
78 bzero(pval, psize);
79 size = psize;
80
81 if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE))
82 return (0);
83
84 mutex_enter(&tcps->tcps_listener_conf_lock);
85 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
86 tl = list_next(&tcps->tcps_listener_conf, tl)) {
87 if (psize == size)
88 nbytes = snprintf(pval, size, "%d:%d", tl->tl_port,
89 tl->tl_ratio);
90 else
91 nbytes = snprintf(pval, size, ",%d:%d", tl->tl_port,
92 tl->tl_ratio);
93 size -= nbytes;
94 pval += nbytes;
95 tbytes += nbytes;
96 if (tbytes >= psize) {
97 /* Buffer overflow, stop copying information */
98 err = ENOBUFS;
99 break;
100 }
101 }
102
103 mutex_exit(&tcps->tcps_listener_conf_lock);
104 return (err);
105 }
106
107 /*
108 * add a new listener limit configuration.
109 */
110 /* ARGSUSED */
111 static int
tcp_listener_conf_add(void * cbarg,cred_t * cr,mod_prop_info_t * pinfo,const char * ifname,const void * pval,uint_t flags)112 tcp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
113 const char *ifname, const void* pval, uint_t flags)
114 {
115 tcp_listener_t *new_tl;
116 tcp_listener_t *tl;
117 long lport;
118 long ratio;
119 char *colon;
120 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
121
122 if (flags & MOD_PROP_DEFAULT)
123 return (ENOTSUP);
124
125 if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 ||
126 lport > USHRT_MAX || *colon != ':') {
127 return (EINVAL);
128 }
129 if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
130 return (EINVAL);
131
132 mutex_enter(&tcps->tcps_listener_conf_lock);
133 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
134 tl = list_next(&tcps->tcps_listener_conf, tl)) {
135 /* There is an existing entry, so update its ratio value. */
136 if (tl->tl_port == lport) {
137 tl->tl_ratio = ratio;
138 mutex_exit(&tcps->tcps_listener_conf_lock);
139 return (0);
140 }
141 }
142
143 if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) ==
144 NULL) {
145 mutex_exit(&tcps->tcps_listener_conf_lock);
146 return (ENOMEM);
147 }
148
149 new_tl->tl_port = lport;
150 new_tl->tl_ratio = ratio;
151 list_insert_tail(&tcps->tcps_listener_conf, new_tl);
152 mutex_exit(&tcps->tcps_listener_conf_lock);
153 return (0);
154 }
155
156 /*
157 * remove a listener limit configuration.
158 */
159 /* ARGSUSED */
160 static int
tcp_listener_conf_del(void * cbarg,cred_t * cr,mod_prop_info_t * pinfo,const char * ifname,const void * pval,uint_t flags)161 tcp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
162 const char *ifname, const void* pval, uint_t flags)
163 {
164 tcp_listener_t *tl;
165 long lport;
166 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
167
168 if (flags & MOD_PROP_DEFAULT)
169 return (ENOTSUP);
170
171 if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 ||
172 lport > USHRT_MAX) {
173 return (EINVAL);
174 }
175 mutex_enter(&tcps->tcps_listener_conf_lock);
176 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
177 tl = list_next(&tcps->tcps_listener_conf, tl)) {
178 if (tl->tl_port == lport) {
179 list_remove(&tcps->tcps_listener_conf, tl);
180 mutex_exit(&tcps->tcps_listener_conf_lock);
181 kmem_free(tl, sizeof (tcp_listener_t));
182 return (0);
183 }
184 }
185 mutex_exit(&tcps->tcps_listener_conf_lock);
186 return (ESRCH);
187 }
188
189 /*
190 * All of these are alterable, within the min/max values given, at run time.
191 *
192 * Note: All those tunables which do not start with "_" are Committed and
193 * therefore are public. See PSARC 2010/080.
194 */
195 mod_prop_info_t tcp_propinfo_tbl[] = {
196 /* tunable - 0 */
197 { "_time_wait_interval", MOD_PROTO_TCP,
198 mod_set_uint32, mod_get_uint32,
199 {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} },
200
201 { "_conn_req_max_q", MOD_PROTO_TCP,
202 mod_set_uint32, mod_get_uint32,
203 {1, UINT32_MAX, 128}, {128} },
204
205 { "_conn_req_max_q0", MOD_PROTO_TCP,
206 mod_set_uint32, mod_get_uint32,
207 {0, UINT32_MAX, 1024}, {1024} },
208
209 { "_conn_req_min", MOD_PROTO_TCP,
210 mod_set_uint32, mod_get_uint32,
211 {1, 1024, 1}, {1} },
212
213 { "_conn_grace_period", MOD_PROTO_TCP,
214 mod_set_uint32, mod_get_uint32,
215 {0*MS, 20*SECONDS, 0*MS}, {0*MS} },
216
217 { "_cwnd_max", MOD_PROTO_TCP,
218 mod_set_uint32, mod_get_uint32,
219 {128, (1<<30), 1024*1024}, {1024*1024} },
220
221 { "_debug", MOD_PROTO_TCP,
222 mod_set_uint32, mod_get_uint32,
223 {0, 10, 0}, {0} },
224
225 { "smallest_nonpriv_port", MOD_PROTO_TCP,
226 mod_set_uint32, mod_get_uint32,
227 {1024, (32*1024), 1024}, {1024} },
228
229 { "_ip_abort_cinterval", MOD_PROTO_TCP,
230 mod_set_uint32, mod_get_uint32,
231 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
232
233 { "_ip_abort_linterval", MOD_PROTO_TCP,
234 mod_set_uint32, mod_get_uint32,
235 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
236
237 /* tunable - 10 */
238 { "_ip_abort_interval", MOD_PROTO_TCP,
239 mod_set_uint32, mod_get_uint32,
240 {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} },
241
242 { "_ip_notify_cinterval", MOD_PROTO_TCP,
243 mod_set_uint32, mod_get_uint32,
244 {1*SECONDS, UINT32_MAX, 10*SECONDS},
245 {10*SECONDS} },
246
247 { "_ip_notify_interval", MOD_PROTO_TCP,
248 mod_set_uint32, mod_get_uint32,
249 {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} },
250
251 { "_ipv4_ttl", MOD_PROTO_TCP,
252 mod_set_uint32, mod_get_uint32,
253 {1, 255, 64}, {64} },
254
255 { "_keepalive_interval", MOD_PROTO_TCP,
256 mod_set_uint32, mod_get_uint32,
257 {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
258
259 { "_maxpsz_multiplier", MOD_PROTO_TCP,
260 mod_set_uint32, mod_get_uint32,
261 {0, 100, 10}, {10} },
262
263 { "_mss_def_ipv4", MOD_PROTO_TCP,
264 mod_set_uint32, mod_get_uint32,
265 {1, TCP_MSS_MAX_IPV4, 536}, {536} },
266
267 { "_mss_max_ipv4", MOD_PROTO_TCP,
268 mod_set_uint32, mod_get_uint32,
269 {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4},
270 {TCP_MSS_MAX_IPV4} },
271
272 { "_mss_min", MOD_PROTO_TCP,
273 mod_set_uint32, mod_get_uint32,
274 {1, TCP_MSS_MAX, 108}, {108} },
275
276 { "_naglim_def", MOD_PROTO_TCP,
277 mod_set_uint32, mod_get_uint32,
278 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
279
280 /* tunable - 20 */
281 { "_rexmit_interval_initial", MOD_PROTO_TCP,
282 mod_set_uint32, mod_get_uint32,
283 {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} },
284
285 { "_rexmit_interval_max", MOD_PROTO_TCP,
286 mod_set_uint32, mod_get_uint32,
287 {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} },
288
289 { "_rexmit_interval_min", MOD_PROTO_TCP,
290 mod_set_uint32, mod_get_uint32,
291 {1*MS, 2*HOURS, 400*MS}, {400*MS} },
292
293 { "_deferred_ack_interval", MOD_PROTO_TCP,
294 mod_set_uint32, mod_get_uint32,
295 {1*MS, 1*MINUTES, 100*MS}, {100*MS} },
296
297 { "_snd_lowat_fraction", MOD_PROTO_TCP,
298 mod_set_uint32, mod_get_uint32,
299 {0, 16, 0}, {0} },
300
301 { "_dupack_fast_retransmit", MOD_PROTO_TCP,
302 mod_set_uint32, mod_get_uint32,
303 {1, 10000, 3}, {3} },
304
305 { "_ignore_path_mtu", MOD_PROTO_TCP,
306 mod_set_boolean, mod_get_boolean,
307 {B_FALSE}, {B_FALSE} },
308
309 { "smallest_anon_port", MOD_PROTO_TCP,
310 mod_set_uint32, mod_get_uint32,
311 {1024, ULP_MAX_PORT, 32*1024}, {32*1024} },
312
313 { "largest_anon_port", MOD_PROTO_TCP,
314 mod_set_uint32, mod_get_uint32,
315 {1024, ULP_MAX_PORT, ULP_MAX_PORT},
316 {ULP_MAX_PORT} },
317
318 { "send_maxbuf", MOD_PROTO_TCP,
319 mod_set_uint32, mod_get_uint32,
320 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_HIWATER},
321 {TCP_XMIT_HIWATER} },
322
323 /* tunable - 30 */
324 { "_xmit_lowat", MOD_PROTO_TCP,
325 mod_set_uint32, mod_get_uint32,
326 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_LOWATER},
327 {TCP_XMIT_LOWATER} },
328
329 { "recv_maxbuf", MOD_PROTO_TCP,
330 mod_set_uint32, mod_get_uint32,
331 {TCP_RECV_LOWATER, (1<<30), TCP_RECV_HIWATER},
332 {TCP_RECV_HIWATER} },
333
334 { "_recv_hiwat_minmss", MOD_PROTO_TCP,
335 mod_set_uint32, mod_get_uint32,
336 {1, 65536, 4}, {4} },
337
338 { "_fin_wait_2_flush_interval", MOD_PROTO_TCP,
339 mod_set_uint32, mod_get_uint32,
340 {1*SECONDS, 2*HOURS, 60*SECONDS},
341 {60*SECONDS} },
342
343 { "_max_buf", MOD_PROTO_TCP,
344 mod_set_uint32, mod_get_uint32,
345 {8192, (1<<30), 1024*1024}, {1024*1024} },
346
347 /*
348 * Question: What default value should I set for tcp_strong_iss?
349 */
350 { "_strong_iss", MOD_PROTO_TCP,
351 mod_set_uint32, mod_get_uint32,
352 {0, 2, 1}, {1} },
353
354 { "_rtt_updates", MOD_PROTO_TCP,
355 mod_set_uint32, mod_get_uint32,
356 {0, 65536, 20}, {20} },
357
358 { "_wscale_always", MOD_PROTO_TCP,
359 mod_set_boolean, mod_get_boolean,
360 {B_TRUE}, {B_TRUE} },
361
362 { "_tstamp_always", MOD_PROTO_TCP,
363 mod_set_boolean, mod_get_boolean,
364 {B_FALSE}, {B_FALSE} },
365
366 { "_tstamp_if_wscale", MOD_PROTO_TCP,
367 mod_set_boolean, mod_get_boolean,
368 {B_TRUE}, {B_TRUE} },
369
370 /* tunable - 40 */
371 { "_rexmit_interval_extra", MOD_PROTO_TCP,
372 mod_set_uint32, mod_get_uint32,
373 {0*MS, 2*HOURS, 0*MS}, {0*MS} },
374
375 { "_deferred_acks_max", MOD_PROTO_TCP,
376 mod_set_uint32, mod_get_uint32,
377 {0, 16, 2}, {2} },
378
379 { "_slow_start_after_idle", MOD_PROTO_TCP,
380 mod_set_uint32, mod_get_uint32,
381 {1, 16384, 4}, {4} },
382
383 { "_slow_start_initial", MOD_PROTO_TCP,
384 mod_set_uint32, mod_get_uint32,
385 {1, 4, 4}, {4} },
386
387 { "sack", MOD_PROTO_TCP,
388 mod_set_uint32, mod_get_uint32,
389 {0, 2, 2}, {2} },
390
391 { "_ipv6_hoplimit", MOD_PROTO_TCP,
392 mod_set_uint32, mod_get_uint32,
393 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
394 {IPV6_DEFAULT_HOPS} },
395
396 { "_mss_def_ipv6", MOD_PROTO_TCP,
397 mod_set_uint32, mod_get_uint32,
398 {1, TCP_MSS_MAX_IPV6, 1220}, {1220} },
399
400 { "_mss_max_ipv6", MOD_PROTO_TCP,
401 mod_set_uint32, mod_get_uint32,
402 {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6},
403 {TCP_MSS_MAX_IPV6} },
404
405 { "_rev_src_routes", MOD_PROTO_TCP,
406 mod_set_boolean, mod_get_boolean,
407 {B_FALSE}, {B_FALSE} },
408
409 { "_local_dack_interval", MOD_PROTO_TCP,
410 mod_set_uint32, mod_get_uint32,
411 {10*MS, 500*MS, 50*MS}, {50*MS} },
412
413 /* tunable - 50 */
414 { "_local_dacks_max", MOD_PROTO_TCP,
415 mod_set_uint32, mod_get_uint32,
416 {0, 16, 8}, {8} },
417
418 { "ecn", MOD_PROTO_TCP,
419 mod_set_uint32, mod_get_uint32,
420 {0, 2, 1}, {1} },
421
422 { "_rst_sent_rate_enabled", MOD_PROTO_TCP,
423 mod_set_boolean, mod_get_boolean,
424 {B_TRUE}, {B_TRUE} },
425
426 { "_rst_sent_rate", MOD_PROTO_TCP,
427 mod_set_uint32, mod_get_uint32,
428 {0, UINT32_MAX, 40}, {40} },
429
430 { "_push_timer_interval", MOD_PROTO_TCP,
431 mod_set_uint32, mod_get_uint32,
432 {0, 100*MS, 50*MS}, {50*MS} },
433
434 { "_use_smss_as_mss_opt", MOD_PROTO_TCP,
435 mod_set_boolean, mod_get_boolean,
436 {B_FALSE}, {B_FALSE} },
437
438 { "_keepalive_abort_interval", MOD_PROTO_TCP,
439 mod_set_uint32, mod_get_uint32,
440 {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} },
441
442 /*
443 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
444 * layer header. It has to be a multiple of 8.
445 */
446 { "_wroff_xtra", MOD_PROTO_TCP,
447 mod_set_aligned, mod_get_uint32,
448 {0, 256, 32}, {32} },
449
450 { "_dev_flow_ctl", MOD_PROTO_TCP,
451 mod_set_boolean, mod_get_boolean,
452 {B_FALSE}, {B_FALSE} },
453
454 { "_reass_timeout", MOD_PROTO_TCP,
455 mod_set_uint32, mod_get_uint32,
456 {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} },
457
458 /* tunable - 60 */
459 { "extra_priv_ports", MOD_PROTO_TCP,
460 mod_set_extra_privports, mod_get_extra_privports,
461 {1, ULP_MAX_PORT, 0}, {0} },
462
463 { "_1948_phrase", MOD_PROTO_TCP,
464 tcp_set_1948phrase, NULL, {0}, {0} },
465
466 { "_listener_limit_conf", MOD_PROTO_TCP,
467 NULL, tcp_listener_conf_get, {0}, {0} },
468
469 { "_listener_limit_conf_add", MOD_PROTO_TCP,
470 tcp_listener_conf_add, NULL, {0}, {0} },
471
472 { "_listener_limit_conf_del", MOD_PROTO_TCP,
473 tcp_listener_conf_del, NULL, {0}, {0} },
474
475 { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
476
477 { NULL, 0, NULL, NULL, {0}, {0} }
478 };
479
480 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);
481