1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/types.h>
26 #include <sys/stream.h>
27 #define _SUN_TPI_VERSION 2
28 #include <sys/tihdr.h>
29 #include <sys/socket.h>
30 #include <sys/xti_xtiopt.h>
31 #include <sys/xti_inet.h>
32 #include <sys/policy.h>
33
34 #include <inet/common.h>
35 #include <netinet/ip6.h>
36 #include <inet/ip.h>
37
38 #include <netinet/in.h>
39 #include <netinet/tcp.h>
40 #include <inet/optcom.h>
41 #include <inet/proto_set.h>
42 #include <inet/tcp_impl.h>
43
44 /*
45 * Table of all known options handled on a TCP protocol stack.
46 *
47 * Note: This table contains options processed by both TCP and IP levels
48 * and is the superset of options that can be performed on a TCP over IP
49 * stack.
50 */
51 opdes_t tcp_opt_arr[] = {
52
53 { SO_LINGER, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
54 sizeof (struct linger), 0 },
55
56 { SO_DEBUG, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
57 { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
58 { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
59 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
60 },
61 { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
62 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
63 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
64 { SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
65 { SO_SNDBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
66 { SO_RCVBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
67 { SO_SNDTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
68 sizeof (struct timeval), 0 },
69 { SO_RCVTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
70 sizeof (struct timeval), 0 },
71 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
72 },
73 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
74 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
75 0 },
76 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
77 0 },
78 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
79 0 },
80 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
81 0 },
82 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
83
84 { SO_DOMAIN, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
85
86 { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
87
88 { TCP_NODELAY, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
89 },
90 { TCP_MAXSEG, IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
91 536 },
92
93 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
94 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
95
96 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
97 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
98
99 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
100 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
101
102 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
103 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
104
105 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
106 0 },
107
108 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
109 sizeof (int), 0 },
110
111 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
112 },
113
114 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
115 sizeof (int), 0 },
116
117 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
118 sizeof (int), 0 },
119
120 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
121 sizeof (int), 0 },
122
123 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
124
125 { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
126
127 { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
128
129 { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
130
131 { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
132
133 { IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
134 (OP_VARLEN|OP_NODEFAULT),
135 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
136 { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
137 (OP_VARLEN|OP_NODEFAULT),
138 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
139
140 { IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
141 { T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
142 { IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
143 sizeof (int), -1 /* not initialized */ },
144
145 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
146 sizeof (ipsec_req_t), -1 /* not initialized */ },
147
148 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
149 sizeof (int), 0 /* no ifindex */ },
150
151 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
152 sizeof (int), 0 },
153
154 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
155 sizeof (int), -1 /* not initialized */ },
156
157 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
158 sizeof (int), 0 /* no ifindex */ },
159
160 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
161
162 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
163 sizeof (in_addr_t), -1 /* not initialized */ },
164
165 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
166 sizeof (int), 0 },
167
168 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
169 (OP_NODEFAULT|OP_VARLEN),
170 sizeof (struct in6_pktinfo), -1 /* not initialized */ },
171 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
172 OP_NODEFAULT,
173 sizeof (sin6_t), -1 /* not initialized */ },
174 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
175 (OP_VARLEN|OP_NODEFAULT), 255*8,
176 -1 /* not initialized */ },
177 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
178 (OP_VARLEN|OP_NODEFAULT), 255*8,
179 -1 /* not initialized */ },
180 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
181 (OP_VARLEN|OP_NODEFAULT), 255*8,
182 -1 /* not initialized */ },
183 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
184 (OP_VARLEN|OP_NODEFAULT), 255*8,
185 -1 /* not initialized */ },
186 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
187 OP_NODEFAULT,
188 sizeof (int), -1 /* not initialized */ },
189 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
190 OP_NODEFAULT,
191 sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
192 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
193 sizeof (int), 0 },
194 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
195 sizeof (int), 0 },
196 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
197 sizeof (int), 0 },
198
199 /* Enable receipt of ancillary data */
200 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
201 sizeof (int), 0 },
202 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
203 sizeof (int), 0 },
204 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
205 sizeof (int), 0 },
206 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
207 sizeof (int), 0 },
208 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
209 sizeof (int), 0 },
210 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
211 sizeof (int), 0 },
212 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
213 sizeof (int), 0 },
214 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
215 sizeof (int), 0 },
216
217 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
218 sizeof (ipsec_req_t), -1 /* not initialized */ },
219 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
220 sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
221 };
222
223 /*
224 * Table of all supported levels
225 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
226 * any supported options so we need this info separately.
227 *
228 * This is needed only for topmost tpi providers and is used only by
229 * XTI interfaces.
230 */
231 optlevel_t tcp_valid_levels_arr[] = {
232 XTI_GENERIC,
233 SOL_SOCKET,
234 IPPROTO_TCP,
235 IPPROTO_IP,
236 IPPROTO_IPV6
237 };
238
239
240 #define TCP_OPT_ARR_CNT A_CNT(tcp_opt_arr)
241 #define TCP_VALID_LEVELS_CNT A_CNT(tcp_valid_levels_arr)
242
243 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
244
245 /*
246 * Initialize option database object for TCP
247 *
248 * This object represents database of options to search passed to
249 * {sock,tpi}optcom_req() interface routine to take care of option
250 * management and associated methods.
251 */
252
253 optdb_obj_t tcp_opt_obj = {
254 tcp_opt_default, /* TCP default value function pointer */
255 tcp_tpi_opt_get, /* TCP get function pointer */
256 tcp_tpi_opt_set, /* TCP set function pointer */
257 TCP_OPT_ARR_CNT, /* TCP option database count of entries */
258 tcp_opt_arr, /* TCP option database */
259 TCP_VALID_LEVELS_CNT, /* TCP valid level count of entries */
260 tcp_valid_levels_arr /* TCP valid level array */
261 };
262
263 /* Maximum TCP initial cwin (start/restart). */
264 #define TCP_MAX_INIT_CWND 16
265
266 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
267
268 /*
269 * Some TCP options can be "set" by requesting them in the option
270 * buffer. This is needed for XTI feature test though we do not
271 * allow it in general. We interpret that this mechanism is more
272 * applicable to OSI protocols and need not be allowed in general.
273 * This routine filters out options for which it is not allowed (most)
274 * and lets through those (few) for which it is. [ The XTI interface
275 * test suite specifics will imply that any XTI_GENERIC level XTI_* if
276 * ever implemented will have to be allowed here ].
277 */
278 static boolean_t
tcp_allow_connopt_set(int level,int name)279 tcp_allow_connopt_set(int level, int name)
280 {
281
282 switch (level) {
283 case IPPROTO_TCP:
284 switch (name) {
285 case TCP_NODELAY:
286 return (B_TRUE);
287 default:
288 return (B_FALSE);
289 }
290 /*NOTREACHED*/
291 default:
292 return (B_FALSE);
293 }
294 /*NOTREACHED*/
295 }
296
297 /*
298 * This routine gets default values of certain options whose default
299 * values are maintained by protocol specific code
300 */
301 /* ARGSUSED */
302 int
tcp_opt_default(queue_t * q,int level,int name,uchar_t * ptr)303 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
304 {
305 int32_t *i1 = (int32_t *)ptr;
306 tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
307
308 switch (level) {
309 case IPPROTO_TCP:
310 switch (name) {
311 case TCP_NOTIFY_THRESHOLD:
312 *i1 = tcps->tcps_ip_notify_interval;
313 break;
314 case TCP_ABORT_THRESHOLD:
315 *i1 = tcps->tcps_ip_abort_interval;
316 break;
317 case TCP_CONN_NOTIFY_THRESHOLD:
318 *i1 = tcps->tcps_ip_notify_cinterval;
319 break;
320 case TCP_CONN_ABORT_THRESHOLD:
321 *i1 = tcps->tcps_ip_abort_cinterval;
322 break;
323 default:
324 return (-1);
325 }
326 break;
327 case IPPROTO_IP:
328 switch (name) {
329 case IP_TTL:
330 *i1 = tcps->tcps_ipv4_ttl;
331 break;
332 default:
333 return (-1);
334 }
335 break;
336 case IPPROTO_IPV6:
337 switch (name) {
338 case IPV6_UNICAST_HOPS:
339 *i1 = tcps->tcps_ipv6_hoplimit;
340 break;
341 default:
342 return (-1);
343 }
344 break;
345 default:
346 return (-1);
347 }
348 return (sizeof (int));
349 }
350
351 /*
352 * TCP routine to get the values of options.
353 */
354 int
tcp_opt_get(conn_t * connp,int level,int name,uchar_t * ptr)355 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
356 {
357 int *i1 = (int *)ptr;
358 tcp_t *tcp = connp->conn_tcp;
359 conn_opt_arg_t coas;
360 int retval;
361
362 coas.coa_connp = connp;
363 coas.coa_ixa = connp->conn_ixa;
364 coas.coa_ipp = &connp->conn_xmit_ipp;
365 coas.coa_ancillary = B_FALSE;
366 coas.coa_changed = 0;
367
368 switch (level) {
369 case SOL_SOCKET:
370 switch (name) {
371 case SO_SND_COPYAVOID:
372 *i1 = tcp->tcp_snd_zcopy_on ?
373 SO_SND_COPYAVOID : 0;
374 return (sizeof (int));
375 case SO_ACCEPTCONN:
376 *i1 = (tcp->tcp_state == TCPS_LISTEN);
377 return (sizeof (int));
378 }
379 break;
380 case IPPROTO_TCP:
381 switch (name) {
382 case TCP_NODELAY:
383 *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
384 return (sizeof (int));
385 case TCP_MAXSEG:
386 *i1 = tcp->tcp_mss;
387 return (sizeof (int));
388 case TCP_NOTIFY_THRESHOLD:
389 *i1 = (int)tcp->tcp_first_timer_threshold;
390 return (sizeof (int));
391 case TCP_ABORT_THRESHOLD:
392 *i1 = tcp->tcp_second_timer_threshold;
393 return (sizeof (int));
394 case TCP_CONN_NOTIFY_THRESHOLD:
395 *i1 = tcp->tcp_first_ctimer_threshold;
396 return (sizeof (int));
397 case TCP_CONN_ABORT_THRESHOLD:
398 *i1 = tcp->tcp_second_ctimer_threshold;
399 return (sizeof (int));
400 case TCP_INIT_CWND:
401 *i1 = tcp->tcp_init_cwnd;
402 return (sizeof (int));
403 case TCP_KEEPALIVE_THRESHOLD:
404 *i1 = tcp->tcp_ka_interval;
405 return (sizeof (int));
406 case TCP_KEEPALIVE_ABORT_THRESHOLD:
407 *i1 = tcp->tcp_ka_abort_thres;
408 return (sizeof (int));
409 case TCP_CORK:
410 *i1 = tcp->tcp_cork;
411 return (sizeof (int));
412 case TCP_RTO_INITIAL:
413 *i1 = tcp->tcp_rto_initial;
414 return (sizeof (uint32_t));
415 case TCP_RTO_MIN:
416 *i1 = tcp->tcp_rto_min;
417 return (sizeof (uint32_t));
418 case TCP_RTO_MAX:
419 *i1 = tcp->tcp_rto_max;
420 return (sizeof (uint32_t));
421 case TCP_LINGER2:
422 *i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
423 return (sizeof (int));
424 }
425 break;
426 case IPPROTO_IP:
427 if (connp->conn_family != AF_INET)
428 return (-1);
429 switch (name) {
430 case IP_OPTIONS:
431 case T_IP_OPTIONS:
432 /* Caller ensures enough space */
433 return (ip_opt_get_user(connp, ptr));
434 default:
435 break;
436 }
437 break;
438
439 case IPPROTO_IPV6:
440 /*
441 * IPPROTO_IPV6 options are only supported for sockets
442 * that are using IPv6 on the wire.
443 */
444 if (connp->conn_ipversion != IPV6_VERSION) {
445 return (-1);
446 }
447 switch (name) {
448 case IPV6_PATHMTU:
449 if (tcp->tcp_state < TCPS_ESTABLISHED)
450 return (-1);
451 break;
452 }
453 break;
454 }
455 mutex_enter(&connp->conn_lock);
456 retval = conn_opt_get(&coas, level, name, ptr);
457 mutex_exit(&connp->conn_lock);
458 return (retval);
459 }
460
461 /*
462 * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
463 * Parameters are assumed to be verified by the caller.
464 */
465 /* ARGSUSED */
466 int
tcp_opt_set(conn_t * connp,uint_t optset_context,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)467 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
468 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
469 void *thisdg_attrs, cred_t *cr)
470 {
471 tcp_t *tcp = connp->conn_tcp;
472 int *i1 = (int *)invalp;
473 boolean_t onoff = (*i1 == 0) ? 0 : 1;
474 boolean_t checkonly;
475 int reterr;
476 tcp_stack_t *tcps = tcp->tcp_tcps;
477 conn_opt_arg_t coas;
478 uint32_t val = *((uint32_t *)invalp);
479
480 coas.coa_connp = connp;
481 coas.coa_ixa = connp->conn_ixa;
482 coas.coa_ipp = &connp->conn_xmit_ipp;
483 coas.coa_ancillary = B_FALSE;
484 coas.coa_changed = 0;
485
486 switch (optset_context) {
487 case SETFN_OPTCOM_CHECKONLY:
488 checkonly = B_TRUE;
489 /*
490 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
491 * inlen != 0 implies value supplied and
492 * we have to "pretend" to set it.
493 * inlen == 0 implies that there is no
494 * value part in T_CHECK request and just validation
495 * done elsewhere should be enough, we just return here.
496 */
497 if (inlen == 0) {
498 *outlenp = 0;
499 return (0);
500 }
501 break;
502 case SETFN_OPTCOM_NEGOTIATE:
503 checkonly = B_FALSE;
504 break;
505 case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
506 case SETFN_CONN_NEGOTIATE:
507 checkonly = B_FALSE;
508 /*
509 * Negotiating local and "association-related" options
510 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
511 * primitives is allowed by XTI, but we choose
512 * to not implement this style negotiation for Internet
513 * protocols (We interpret it is a must for OSI world but
514 * optional for Internet protocols) for all options.
515 * [ Will do only for the few options that enable test
516 * suites that our XTI implementation of this feature
517 * works for transports that do allow it ]
518 */
519 if (!tcp_allow_connopt_set(level, name)) {
520 *outlenp = 0;
521 return (EINVAL);
522 }
523 break;
524 default:
525 /*
526 * We should never get here
527 */
528 *outlenp = 0;
529 return (EINVAL);
530 }
531
532 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
533 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
534
535 /*
536 * For TCP, we should have no ancillary data sent down
537 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
538 * has to be zero.
539 */
540 ASSERT(thisdg_attrs == NULL);
541
542 /*
543 * For fixed length options, no sanity check
544 * of passed in length is done. It is assumed *_optcom_req()
545 * routines do the right thing.
546 */
547 switch (level) {
548 case SOL_SOCKET:
549 switch (name) {
550 case SO_KEEPALIVE:
551 if (checkonly) {
552 /* check only case */
553 break;
554 }
555
556 if (!onoff) {
557 if (connp->conn_keepalive) {
558 if (tcp->tcp_ka_tid != 0) {
559 (void) TCP_TIMER_CANCEL(tcp,
560 tcp->tcp_ka_tid);
561 tcp->tcp_ka_tid = 0;
562 }
563 connp->conn_keepalive = 0;
564 }
565 break;
566 }
567 if (!connp->conn_keepalive) {
568 /* Crank up the keepalive timer */
569 tcp->tcp_ka_last_intrvl = 0;
570 tcp->tcp_ka_tid = TCP_TIMER(tcp,
571 tcp_keepalive_timer, tcp->tcp_ka_interval);
572 connp->conn_keepalive = 1;
573 }
574 break;
575 case SO_SNDBUF: {
576 if (*i1 > tcps->tcps_max_buf) {
577 *outlenp = 0;
578 return (ENOBUFS);
579 }
580 if (checkonly)
581 break;
582
583 connp->conn_sndbuf = *i1;
584 if (tcps->tcps_snd_lowat_fraction != 0) {
585 connp->conn_sndlowat = connp->conn_sndbuf /
586 tcps->tcps_snd_lowat_fraction;
587 }
588 (void) tcp_maxpsz_set(tcp, B_TRUE);
589 /*
590 * If we are flow-controlled, recheck the condition.
591 * There are apps that increase SO_SNDBUF size when
592 * flow-controlled (EWOULDBLOCK), and expect the flow
593 * control condition to be lifted right away.
594 */
595 mutex_enter(&tcp->tcp_non_sq_lock);
596 if (tcp->tcp_flow_stopped &&
597 TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
598 tcp_clrqfull(tcp);
599 }
600 mutex_exit(&tcp->tcp_non_sq_lock);
601 *outlenp = inlen;
602 return (0);
603 }
604 case SO_RCVBUF:
605 if (*i1 > tcps->tcps_max_buf) {
606 *outlenp = 0;
607 return (ENOBUFS);
608 }
609 /* Silently ignore zero */
610 if (!checkonly && *i1 != 0) {
611 *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
612 (void) tcp_rwnd_set(tcp, *i1);
613 }
614 /*
615 * XXX should we return the rwnd here
616 * and tcp_opt_get ?
617 */
618 *outlenp = inlen;
619 return (0);
620 case SO_SND_COPYAVOID:
621 if (!checkonly) {
622 if (tcp->tcp_loopback ||
623 (onoff != 1) || !tcp_zcopy_check(tcp)) {
624 *outlenp = 0;
625 return (EOPNOTSUPP);
626 }
627 tcp->tcp_snd_zcopy_aware = 1;
628 }
629 *outlenp = inlen;
630 return (0);
631 }
632 break;
633 case IPPROTO_TCP:
634 switch (name) {
635 case TCP_NODELAY:
636 if (!checkonly)
637 tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
638 break;
639 case TCP_NOTIFY_THRESHOLD:
640 if (!checkonly)
641 tcp->tcp_first_timer_threshold = *i1;
642 break;
643 case TCP_ABORT_THRESHOLD:
644 if (!checkonly)
645 tcp->tcp_second_timer_threshold = *i1;
646 break;
647 case TCP_CONN_NOTIFY_THRESHOLD:
648 if (!checkonly)
649 tcp->tcp_first_ctimer_threshold = *i1;
650 break;
651 case TCP_CONN_ABORT_THRESHOLD:
652 if (!checkonly)
653 tcp->tcp_second_ctimer_threshold = *i1;
654 break;
655 case TCP_RECVDSTADDR:
656 if (tcp->tcp_state > TCPS_LISTEN) {
657 *outlenp = 0;
658 return (EOPNOTSUPP);
659 }
660 /* Setting done in conn_opt_set */
661 break;
662 case TCP_INIT_CWND:
663 if (checkonly)
664 break;
665
666 /*
667 * Only allow socket with network configuration
668 * privilege to set the initial cwnd to be larger
669 * than allowed by RFC 3390.
670 */
671 if (val <= MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
672 tcp->tcp_init_cwnd = val;
673 break;
674 }
675 if ((reterr = secpolicy_ip_config(cr, B_TRUE)) != 0) {
676 *outlenp = 0;
677 return (reterr);
678 }
679 if (val > tcp_max_init_cwnd) {
680 *outlenp = 0;
681 return (EINVAL);
682 }
683 tcp->tcp_init_cwnd = val;
684 break;
685 case TCP_KEEPALIVE_THRESHOLD:
686 if (checkonly)
687 break;
688
689 if (*i1 < tcps->tcps_keepalive_interval_low ||
690 *i1 > tcps->tcps_keepalive_interval_high) {
691 *outlenp = 0;
692 return (EINVAL);
693 }
694 if (*i1 != tcp->tcp_ka_interval) {
695 tcp->tcp_ka_interval = *i1;
696 /*
697 * Check if we need to restart the
698 * keepalive timer.
699 */
700 if (tcp->tcp_ka_tid != 0) {
701 ASSERT(connp->conn_keepalive);
702 (void) TCP_TIMER_CANCEL(tcp,
703 tcp->tcp_ka_tid);
704 tcp->tcp_ka_last_intrvl = 0;
705 tcp->tcp_ka_tid = TCP_TIMER(tcp,
706 tcp_keepalive_timer,
707 tcp->tcp_ka_interval);
708 }
709 }
710 break;
711 case TCP_KEEPALIVE_ABORT_THRESHOLD:
712 if (!checkonly) {
713 if (*i1 <
714 tcps->tcps_keepalive_abort_interval_low ||
715 *i1 >
716 tcps->tcps_keepalive_abort_interval_high) {
717 *outlenp = 0;
718 return (EINVAL);
719 }
720 tcp->tcp_ka_abort_thres = *i1;
721 }
722 break;
723 case TCP_CORK:
724 if (!checkonly) {
725 /*
726 * if tcp->tcp_cork was set and is now
727 * being unset, we have to make sure that
728 * the remaining data gets sent out. Also
729 * unset tcp->tcp_cork so that tcp_wput_data()
730 * can send data even if it is less than mss
731 */
732 if (tcp->tcp_cork && onoff == 0 &&
733 tcp->tcp_unsent > 0) {
734 tcp->tcp_cork = B_FALSE;
735 tcp_wput_data(tcp, NULL, B_FALSE);
736 }
737 tcp->tcp_cork = onoff;
738 }
739 break;
740 case TCP_RTO_INITIAL: {
741 clock_t rto;
742
743 if (checkonly || val == 0)
744 break;
745
746 /*
747 * Sanity checks
748 *
749 * The initial RTO should be bounded by the minimum
750 * and maximum RTO. And it should also be smaller
751 * than the connect attempt abort timeout. Otherwise,
752 * the connection won't be aborted in a period
753 * reasonably close to that timeout.
754 */
755 if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
756 val > tcp->tcp_second_ctimer_threshold ||
757 val < tcps->tcps_rexmit_interval_initial_low ||
758 val > tcps->tcps_rexmit_interval_initial_high) {
759 *outlenp = 0;
760 return (EINVAL);
761 }
762 tcp->tcp_rto_initial = val;
763
764 /*
765 * If TCP has not sent anything, need to re-calculate
766 * tcp_rto. Otherwise, this option change does not
767 * really affect anything.
768 */
769 if (tcp->tcp_state >= TCPS_SYN_SENT)
770 break;
771
772 tcp->tcp_rtt_sa = tcp->tcp_rto_initial << 2;
773 tcp->tcp_rtt_sd = tcp->tcp_rto_initial >> 1;
774 rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
775 tcps->tcps_rexmit_interval_extra +
776 (tcp->tcp_rtt_sa >> 5) +
777 tcps->tcps_conn_grace_period;
778 TCP_SET_RTO(tcp, rto);
779 break;
780 }
781 case TCP_RTO_MIN:
782 if (checkonly || val == 0)
783 break;
784
785 if (val < tcps->tcps_rexmit_interval_min_low ||
786 val > tcps->tcps_rexmit_interval_min_high ||
787 val > tcp->tcp_rto_max) {
788 *outlenp = 0;
789 return (EINVAL);
790 }
791 tcp->tcp_rto_min = val;
792 if (tcp->tcp_rto < val)
793 tcp->tcp_rto = val;
794 break;
795 case TCP_RTO_MAX:
796 if (checkonly || val == 0)
797 break;
798
799 /*
800 * Sanity checks
801 *
802 * The maximum RTO should not be larger than the
803 * connection abort timeout. Otherwise, the
804 * connection won't be aborted in a period reasonably
805 * close to that timeout.
806 */
807 if (val < tcps->tcps_rexmit_interval_max_low ||
808 val > tcps->tcps_rexmit_interval_max_high ||
809 val < tcp->tcp_rto_min ||
810 val > tcp->tcp_second_timer_threshold) {
811 *outlenp = 0;
812 return (EINVAL);
813 }
814 tcp->tcp_rto_max = val;
815 if (tcp->tcp_rto > val)
816 tcp->tcp_rto = val;
817 break;
818 case TCP_LINGER2:
819 if (checkonly || *i1 == 0)
820 break;
821
822 /*
823 * Note that the option value's unit is second. And
824 * the value should be bigger than the private
825 * parameter tcp_fin_wait_2_flush_interval's lower
826 * bound and smaller than the current value of that
827 * parameter. It should be smaller than the current
828 * value to avoid an app setting TCP_LINGER2 to a big
829 * value, causing resource to be held up too long in
830 * FIN-WAIT-2 state.
831 */
832 if (*i1 < 0 ||
833 tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
834 *i1 ||
835 tcps->tcps_fin_wait_2_flush_interval/SECONDS <
836 *i1) {
837 *outlenp = 0;
838 return (EINVAL);
839 }
840 tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
841 break;
842 default:
843 break;
844 }
845 break;
846 case IPPROTO_IP:
847 if (connp->conn_family != AF_INET) {
848 *outlenp = 0;
849 return (EINVAL);
850 }
851 switch (name) {
852 case IP_SEC_OPT:
853 /*
854 * We should not allow policy setting after
855 * we start listening for connections.
856 */
857 if (tcp->tcp_state == TCPS_LISTEN) {
858 return (EINVAL);
859 }
860 break;
861 }
862 break;
863 case IPPROTO_IPV6:
864 /*
865 * IPPROTO_IPV6 options are only supported for sockets
866 * that are using IPv6 on the wire.
867 */
868 if (connp->conn_ipversion != IPV6_VERSION) {
869 *outlenp = 0;
870 return (EINVAL);
871 }
872
873 switch (name) {
874 case IPV6_RECVPKTINFO:
875 if (!checkonly) {
876 /* Force it to be sent up with the next msg */
877 tcp->tcp_recvifindex = 0;
878 }
879 break;
880 case IPV6_RECVTCLASS:
881 if (!checkonly) {
882 /* Force it to be sent up with the next msg */
883 tcp->tcp_recvtclass = 0xffffffffU;
884 }
885 break;
886 case IPV6_RECVHOPLIMIT:
887 if (!checkonly) {
888 /* Force it to be sent up with the next msg */
889 tcp->tcp_recvhops = 0xffffffffU;
890 }
891 break;
892 case IPV6_PKTINFO:
893 /* This is an extra check for TCP */
894 if (inlen == sizeof (struct in6_pktinfo)) {
895 struct in6_pktinfo *pkti;
896
897 pkti = (struct in6_pktinfo *)invalp;
898 /*
899 * RFC 3542 states that ipi6_addr must be
900 * the unspecified address when setting the
901 * IPV6_PKTINFO sticky socket option on a
902 * TCP socket.
903 */
904 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
905 return (EINVAL);
906 }
907 break;
908 case IPV6_SEC_OPT:
909 /*
910 * We should not allow policy setting after
911 * we start listening for connections.
912 */
913 if (tcp->tcp_state == TCPS_LISTEN) {
914 return (EINVAL);
915 }
916 break;
917 }
918 break;
919 }
920 reterr = conn_opt_set(&coas, level, name, inlen, invalp,
921 checkonly, cr);
922 if (reterr != 0) {
923 *outlenp = 0;
924 return (reterr);
925 }
926
927 /*
928 * Common case of OK return with outval same as inval
929 */
930 if (invalp != outvalp) {
931 /* don't trust bcopy for identical src/dst */
932 (void) bcopy(invalp, outvalp, inlen);
933 }
934 *outlenp = inlen;
935
936 if (coas.coa_changed & COA_HEADER_CHANGED) {
937 /* If we are connected we rebuilt the headers */
938 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
939 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
940 reterr = tcp_build_hdrs(tcp);
941 if (reterr != 0)
942 return (reterr);
943 }
944 }
945 if (coas.coa_changed & COA_ROUTE_CHANGED) {
946 in6_addr_t nexthop;
947
948 /*
949 * If we are connected we re-cache the information.
950 * We ignore errors to preserve BSD behavior.
951 * Note that we don't redo IPsec policy lookup here
952 * since the final destination (or source) didn't change.
953 */
954 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
955 &connp->conn_faddr_v6, &nexthop);
956
957 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
958 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
959 (void) ip_attr_connect(connp, connp->conn_ixa,
960 &connp->conn_laddr_v6, &connp->conn_faddr_v6,
961 &nexthop, connp->conn_fport, NULL, NULL,
962 IPDF_VERIFY_DST);
963 }
964 }
965 if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
966 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
967 }
968 if (coas.coa_changed & COA_WROFF_CHANGED) {
969 connp->conn_wroff = connp->conn_ht_iphc_allocated +
970 tcps->tcps_wroff_xtra;
971 (void) proto_set_tx_wroff(connp->conn_rq, connp,
972 connp->conn_wroff);
973 }
974 if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
975 if (IPCL_IS_NONSTR(connp))
976 proto_set_rx_oob_opt(connp, onoff);
977 }
978 return (0);
979 }
980