1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/sysmacros.h>
30 #include <sys/debug.h>
31 #include <sys/cmn_err.h>
32
33 #include <sys/stropts.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36
37 #define _SUN_TPI_VERSION 2
38 #include <sys/tihdr.h>
39 #include <sys/sockio.h>
40 #include <sys/kmem_impl.h>
41
42 #include <sys/strsubr.h>
43 #include <sys/strsun.h>
44 #include <sys/ddi.h>
45 #include <netinet/in.h>
46 #include <inet/ip.h>
47
48 #include <fs/sockfs/sockcommon.h>
49 #include <fs/sockfs/sockfilter_impl.h>
50
51 #include <sys/socket_proto.h>
52
53 #include <fs/sockfs/socktpi_impl.h>
54 #include <fs/sockfs/sodirect.h>
55 #include <sys/tihdr.h>
56 #include <fs/sockfs/nl7c.h>
57
58 extern int xnet_skip_checks;
59 extern int xnet_check_print;
60
61 static void so_queue_oob(struct sonode *, mblk_t *, size_t);
62
63
64 /*ARGSUSED*/
65 int
so_accept_notsupp(struct sonode * lso,int fflag,struct cred * cr,struct sonode ** nsop)66 so_accept_notsupp(struct sonode *lso, int fflag,
67 struct cred *cr, struct sonode **nsop)
68 {
69 return (EOPNOTSUPP);
70 }
71
72 /*ARGSUSED*/
73 int
so_listen_notsupp(struct sonode * so,int backlog,struct cred * cr)74 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
75 {
76 return (EOPNOTSUPP);
77 }
78
79 /*ARGSUSED*/
80 int
so_getsockname_notsupp(struct sonode * so,struct sockaddr * sa,socklen_t * len,struct cred * cr)81 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
82 socklen_t *len, struct cred *cr)
83 {
84 return (EOPNOTSUPP);
85 }
86
87 /*ARGSUSED*/
88 int
so_getpeername_notsupp(struct sonode * so,struct sockaddr * addr,socklen_t * addrlen,boolean_t accept,struct cred * cr)89 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
90 socklen_t *addrlen, boolean_t accept, struct cred *cr)
91 {
92 return (EOPNOTSUPP);
93 }
94
95 /*ARGSUSED*/
96 int
so_shutdown_notsupp(struct sonode * so,int how,struct cred * cr)97 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
98 {
99 return (EOPNOTSUPP);
100 }
101
102 /*ARGSUSED*/
103 int
so_sendmblk_notsupp(struct sonode * so,struct msghdr * msg,int fflag,struct cred * cr,mblk_t ** mpp)104 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
105 struct cred *cr, mblk_t **mpp)
106 {
107 return (EOPNOTSUPP);
108 }
109
110 /*
111 * Generic Socket Ops
112 */
113
114 /* ARGSUSED */
115 int
so_init(struct sonode * so,struct sonode * pso,struct cred * cr,int flags)116 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
117 {
118 return (socket_init_common(so, pso, flags, cr));
119 }
120
121 int
so_bind(struct sonode * so,struct sockaddr * name,socklen_t namelen,int flags,struct cred * cr)122 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
123 int flags, struct cred *cr)
124 {
125 int error;
126
127 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
128
129 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
130
131 /* X/Open requires this check */
132 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
133 if (xnet_check_print) {
134 printf("sockfs: X/Open bind state check "
135 "caused EINVAL\n");
136 }
137 error = EINVAL;
138 goto done;
139 }
140
141 /*
142 * a bind to a NULL address is interpreted as unbind. So just
143 * do the downcall.
144 */
145 if (name == NULL)
146 goto dobind;
147
148 switch (so->so_family) {
149 case AF_INET:
150 if ((size_t)namelen != sizeof (sin_t)) {
151 error = name->sa_family != so->so_family ?
152 EAFNOSUPPORT : EINVAL;
153 eprintsoline(so, error);
154 goto done;
155 }
156
157 if ((flags & _SOBIND_XPG4_2) &&
158 (name->sa_family != so->so_family)) {
159 /*
160 * This check has to be made for X/Open
161 * sockets however application failures have
162 * been observed when it is applied to
163 * all sockets.
164 */
165 error = EAFNOSUPPORT;
166 eprintsoline(so, error);
167 goto done;
168 }
169 /*
170 * Force a zero sa_family to match so_family.
171 *
172 * Some programs like inetd(1M) don't set the
173 * family field. Other programs leave
174 * sin_family set to garbage - SunOS 4.X does
175 * not check the family field on a bind.
176 * We use the family field that
177 * was passed in to the socket() call.
178 */
179 name->sa_family = so->so_family;
180 break;
181
182 case AF_INET6: {
183 #ifdef DEBUG
184 sin6_t *sin6 = (sin6_t *)name;
185 #endif
186 if ((size_t)namelen != sizeof (sin6_t)) {
187 error = name->sa_family != so->so_family ?
188 EAFNOSUPPORT : EINVAL;
189 eprintsoline(so, error);
190 goto done;
191 }
192
193 if (name->sa_family != so->so_family) {
194 /*
195 * With IPv6 we require the family to match
196 * unlike in IPv4.
197 */
198 error = EAFNOSUPPORT;
199 eprintsoline(so, error);
200 goto done;
201 }
202 #ifdef DEBUG
203 /*
204 * Verify that apps don't forget to clear
205 * sin6_scope_id etc
206 */
207 if (sin6->sin6_scope_id != 0 &&
208 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
209 zcmn_err(getzoneid(), CE_WARN,
210 "bind with uninitialized sin6_scope_id "
211 "(%d) on socket. Pid = %d\n",
212 (int)sin6->sin6_scope_id,
213 (int)curproc->p_pid);
214 }
215 if (sin6->__sin6_src_id != 0) {
216 zcmn_err(getzoneid(), CE_WARN,
217 "bind with uninitialized __sin6_src_id "
218 "(%d) on socket. Pid = %d\n",
219 (int)sin6->__sin6_src_id,
220 (int)curproc->p_pid);
221 }
222 #endif /* DEBUG */
223
224 break;
225 }
226 default:
227 /* Just pass the request to the protocol */
228 goto dobind;
229 }
230
231 /*
232 * First we check if either NCA or KSSL has been enabled for
233 * the requested address, and if so, we fall back to TPI.
234 * If neither of those two services are enabled, then we just
235 * pass the request to the protocol.
236 *
237 * Note that KSSL can only be enabled on a socket if NCA is NOT
238 * enabled for that socket, hence the else-statement below.
239 */
240 if (nl7c_enabled && ((so->so_family == AF_INET ||
241 so->so_family == AF_INET6) &&
242 nl7c_lookup_addr(name, namelen) != NULL)) {
243 /*
244 * NL7C is not supported in non-global zones,
245 * we enforce this restriction here.
246 */
247 if (so->so_zoneid == GLOBAL_ZONEID) {
248 /* NCA should be used, so fall back to TPI */
249 error = so_tpi_fallback(so, cr);
250 SO_UNBLOCK_FALLBACK(so);
251 if (error)
252 return (error);
253 else
254 return (SOP_BIND(so, name, namelen, flags, cr));
255 }
256 }
257
258 dobind:
259 if (so->so_filter_active == 0 ||
260 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) {
261 error = (*so->so_downcalls->sd_bind)
262 (so->so_proto_handle, name, namelen, cr);
263 }
264 done:
265 SO_UNBLOCK_FALLBACK(so);
266
267 return (error);
268 }
269
270 int
so_listen(struct sonode * so,int backlog,struct cred * cr)271 so_listen(struct sonode *so, int backlog, struct cred *cr)
272 {
273 int error = 0;
274
275 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
276 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
277
278 if ((so)->so_filter_active == 0 ||
279 (error = sof_filter_listen(so, &backlog, cr)) < 0)
280 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle,
281 backlog, cr);
282
283 SO_UNBLOCK_FALLBACK(so);
284
285 return (error);
286 }
287
288
289 int
so_connect(struct sonode * so,struct sockaddr * name,socklen_t namelen,int fflag,int flags,struct cred * cr)290 so_connect(struct sonode *so, struct sockaddr *name,
291 socklen_t namelen, int fflag, int flags, struct cred *cr)
292 {
293 int error = 0;
294 sock_connid_t id;
295
296 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
297 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
298
299 /*
300 * If there is a pending error, return error
301 * This can happen if a non blocking operation caused an error.
302 */
303
304 if (so->so_error != 0) {
305 mutex_enter(&so->so_lock);
306 error = sogeterr(so, B_TRUE);
307 mutex_exit(&so->so_lock);
308 if (error != 0)
309 goto done;
310 }
311
312 if (so->so_filter_active == 0 ||
313 (error = sof_filter_connect(so, (struct sockaddr *)name,
314 &namelen, cr)) < 0) {
315 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
316 name, namelen, &id, cr);
317
318 if (error == EINPROGRESS)
319 error = so_wait_connected(so,
320 fflag & (FNONBLOCK|FNDELAY), id);
321 }
322 done:
323 SO_UNBLOCK_FALLBACK(so);
324 return (error);
325 }
326
327 /*ARGSUSED*/
328 int
so_accept(struct sonode * so,int fflag,struct cred * cr,struct sonode ** nsop)329 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
330 {
331 int error = 0;
332 struct sonode *nso;
333
334 *nsop = NULL;
335
336 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
337 if ((so->so_state & SS_ACCEPTCONN) == 0) {
338 SO_UNBLOCK_FALLBACK(so);
339 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
340 EOPNOTSUPP : EINVAL);
341 }
342
343 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
344 &nso)) == 0) {
345 ASSERT(nso != NULL);
346
347 /* finish the accept */
348 if ((so->so_filter_active > 0 &&
349 (error = sof_filter_accept(nso, cr)) > 0) ||
350 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
351 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) {
352 (void) socket_close(nso, 0, cr);
353 socket_destroy(nso);
354 } else {
355 *nsop = nso;
356 }
357 }
358
359 SO_UNBLOCK_FALLBACK(so);
360 return (error);
361 }
362
363 int
so_sendmsg(struct sonode * so,struct nmsghdr * msg,struct uio * uiop,struct cred * cr)364 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
365 struct cred *cr)
366 {
367 int error, flags;
368 boolean_t dontblock;
369 ssize_t orig_resid;
370 mblk_t *mp;
371
372 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
373
374 flags = msg->msg_flags;
375 error = 0;
376 dontblock = (flags & MSG_DONTWAIT) ||
377 (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
378
379 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
380 /*
381 * Old way of passing fd's is not supported
382 */
383 SO_UNBLOCK_FALLBACK(so);
384 return (EOPNOTSUPP);
385 }
386
387 if ((so->so_mode & SM_ATOMIC) &&
388 uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
389 so->so_proto_props.sopp_maxpsz != -1) {
390 SO_UNBLOCK_FALLBACK(so);
391 return (EMSGSIZE);
392 }
393
394 /*
395 * For atomic sends we will only do one iteration.
396 */
397 do {
398 if (so->so_state & SS_CANTSENDMORE) {
399 error = EPIPE;
400 break;
401 }
402
403 if (so->so_error != 0) {
404 mutex_enter(&so->so_lock);
405 error = sogeterr(so, B_TRUE);
406 mutex_exit(&so->so_lock);
407 if (error != 0)
408 break;
409 }
410
411 /*
412 * Send down OOB messages even if the send path is being
413 * flow controlled (assuming the protocol supports OOB data).
414 */
415 if (flags & MSG_OOB) {
416 if ((so->so_mode & SM_EXDATA) == 0) {
417 error = EOPNOTSUPP;
418 break;
419 }
420 } else if (SO_SND_FLOWCTRLD(so)) {
421 /*
422 * Need to wait until the protocol is ready to receive
423 * more data for transmission.
424 */
425 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
426 break;
427 }
428
429 /*
430 * Time to send data to the protocol. We either copy the
431 * data into mblks or pass the uio directly to the protocol.
432 * We decide what to do based on the available down calls.
433 */
434 if (so->so_downcalls->sd_send_uio != NULL) {
435 error = (*so->so_downcalls->sd_send_uio)
436 (so->so_proto_handle, uiop, msg, cr);
437 if (error != 0)
438 break;
439 } else {
440 /* save the resid in case of failure */
441 orig_resid = uiop->uio_resid;
442
443 if ((mp = socopyinuio(uiop,
444 so->so_proto_props.sopp_maxpsz,
445 so->so_proto_props.sopp_wroff,
446 so->so_proto_props.sopp_maxblk,
447 so->so_proto_props.sopp_tail, &error)) == NULL) {
448 break;
449 }
450 ASSERT(uiop->uio_resid >= 0);
451
452 if (so->so_filter_active > 0 &&
453 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr,
454 &error)) == NULL)) {
455 if (error != 0)
456 break;
457 continue;
458 }
459 error = (*so->so_downcalls->sd_send)
460 (so->so_proto_handle, mp, msg, cr);
461 if (error != 0) {
462 /*
463 * The send failed. We do not have to free the
464 * mblks, because that is the protocol's
465 * responsibility. However, uio_resid must
466 * remain accurate, so adjust that here.
467 */
468 uiop->uio_resid = orig_resid;
469 break;
470 }
471 }
472 } while (uiop->uio_resid > 0);
473
474 SO_UNBLOCK_FALLBACK(so);
475
476 return (error);
477 }
478
479 int
so_sendmblk_impl(struct sonode * so,struct nmsghdr * msg,int fflag,struct cred * cr,mblk_t ** mpp,sof_instance_t * fil,boolean_t fil_inject)480 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag,
481 struct cred *cr, mblk_t **mpp, sof_instance_t *fil,
482 boolean_t fil_inject)
483 {
484 int error;
485 boolean_t dontblock;
486 size_t size;
487 mblk_t *mp = *mpp;
488
489 if (so->so_downcalls->sd_send == NULL)
490 return (EOPNOTSUPP);
491
492 error = 0;
493 dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
494 (fflag & (FNONBLOCK|FNDELAY));
495 size = msgdsize(mp);
496
497 if ((so->so_mode & SM_ATOMIC) &&
498 size > so->so_proto_props.sopp_maxpsz &&
499 so->so_proto_props.sopp_maxpsz != -1) {
500 SO_UNBLOCK_FALLBACK(so);
501 return (EMSGSIZE);
502 }
503
504 while (mp != NULL) {
505 mblk_t *nmp, *last_mblk;
506 size_t mlen;
507
508 if (so->so_state & SS_CANTSENDMORE) {
509 error = EPIPE;
510 break;
511 }
512 if (so->so_error != 0) {
513 mutex_enter(&so->so_lock);
514 error = sogeterr(so, B_TRUE);
515 mutex_exit(&so->so_lock);
516 if (error != 0)
517 break;
518 }
519 /* Socket filters are not flow controlled */
520 if (SO_SND_FLOWCTRLD(so) && !fil_inject) {
521 /*
522 * Need to wait until the protocol is ready to receive
523 * more data for transmission.
524 */
525 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
526 break;
527 }
528
529 /*
530 * We only allow so_maxpsz of data to be sent down to
531 * the protocol at time.
532 */
533 mlen = MBLKL(mp);
534 nmp = mp->b_cont;
535 last_mblk = mp;
536 while (nmp != NULL) {
537 mlen += MBLKL(nmp);
538 if (mlen > so->so_proto_props.sopp_maxpsz) {
539 last_mblk->b_cont = NULL;
540 break;
541 }
542 last_mblk = nmp;
543 nmp = nmp->b_cont;
544 }
545
546 if (so->so_filter_active > 0 &&
547 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg,
548 cr, &error)) == NULL) {
549 *mpp = mp = nmp;
550 if (error != 0)
551 break;
552 continue;
553 }
554 error = (*so->so_downcalls->sd_send)
555 (so->so_proto_handle, mp, msg, cr);
556 if (error != 0) {
557 /*
558 * The send failed. The protocol will free the mblks
559 * that were sent down. Let the caller deal with the
560 * rest.
561 */
562 *mpp = nmp;
563 break;
564 }
565
566 *mpp = mp = nmp;
567 }
568 /* Let the filter know whether the protocol is flow controlled */
569 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so))
570 error = ENOSPC;
571
572 return (error);
573 }
574
575 #pragma inline(so_sendmblk_impl)
576
577 int
so_sendmblk(struct sonode * so,struct nmsghdr * msg,int fflag,struct cred * cr,mblk_t ** mpp)578 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
579 struct cred *cr, mblk_t **mpp)
580 {
581 int error;
582
583 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
584
585 if ((so->so_mode & SM_SENDFILESUPP) == 0) {
586 SO_UNBLOCK_FALLBACK(so);
587 return (EOPNOTSUPP);
588 }
589
590 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
591 B_FALSE);
592
593 SO_UNBLOCK_FALLBACK(so);
594
595 return (error);
596 }
597
598 int
so_shutdown(struct sonode * so,int how,struct cred * cr)599 so_shutdown(struct sonode *so, int how, struct cred *cr)
600 {
601 int error;
602
603 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
604
605 /*
606 * SunOS 4.X has no check for datagram sockets.
607 * 5.X checks that it is connected (ENOTCONN)
608 * X/Open requires that we check the connected state.
609 */
610 if (!(so->so_state & SS_ISCONNECTED)) {
611 if (!xnet_skip_checks) {
612 error = ENOTCONN;
613 if (xnet_check_print) {
614 printf("sockfs: X/Open shutdown check "
615 "caused ENOTCONN\n");
616 }
617 }
618 goto done;
619 }
620
621 if (so->so_filter_active == 0 ||
622 (error = sof_filter_shutdown(so, &how, cr)) < 0)
623 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
624 how, cr));
625
626 /*
627 * Protocol agreed to shutdown. We need to flush the
628 * receive buffer if the receive side is being shutdown.
629 */
630 if (error == 0 && how != SHUT_WR) {
631 mutex_enter(&so->so_lock);
632 /* wait for active reader to finish */
633 (void) so_lock_read(so, 0);
634
635 so_rcv_flush(so);
636
637 so_unlock_read(so);
638 mutex_exit(&so->so_lock);
639 }
640
641 done:
642 SO_UNBLOCK_FALLBACK(so);
643 return (error);
644 }
645
646 int
so_getsockname(struct sonode * so,struct sockaddr * addr,socklen_t * addrlen,struct cred * cr)647 so_getsockname(struct sonode *so, struct sockaddr *addr,
648 socklen_t *addrlen, struct cred *cr)
649 {
650 int error;
651
652 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
653
654 if (so->so_filter_active == 0 ||
655 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
656 error = (*so->so_downcalls->sd_getsockname)
657 (so->so_proto_handle, addr, addrlen, cr);
658
659 SO_UNBLOCK_FALLBACK(so);
660 return (error);
661 }
662
663 int
so_getpeername(struct sonode * so,struct sockaddr * addr,socklen_t * addrlen,boolean_t accept,struct cred * cr)664 so_getpeername(struct sonode *so, struct sockaddr *addr,
665 socklen_t *addrlen, boolean_t accept, struct cred *cr)
666 {
667 int error;
668
669 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
670
671 if (accept) {
672 error = (*so->so_downcalls->sd_getpeername)
673 (so->so_proto_handle, addr, addrlen, cr);
674 } else if (!(so->so_state & SS_ISCONNECTED)) {
675 error = ENOTCONN;
676 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
677 /* Added this check for X/Open */
678 error = EINVAL;
679 if (xnet_check_print) {
680 printf("sockfs: X/Open getpeername check => EINVAL\n");
681 }
682 } else if (so->so_filter_active == 0 ||
683 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) {
684 error = (*so->so_downcalls->sd_getpeername)
685 (so->so_proto_handle, addr, addrlen, cr);
686 }
687
688 SO_UNBLOCK_FALLBACK(so);
689 return (error);
690 }
691
692 int
so_getsockopt(struct sonode * so,int level,int option_name,void * optval,socklen_t * optlenp,int flags,struct cred * cr)693 so_getsockopt(struct sonode *so, int level, int option_name,
694 void *optval, socklen_t *optlenp, int flags, struct cred *cr)
695 {
696 int error = 0;
697
698 if (level == SOL_FILTER)
699 return (sof_getsockopt(so, option_name, optval, optlenp, cr));
700
701 SO_BLOCK_FALLBACK(so,
702 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
703
704 if ((so->so_filter_active == 0 ||
705 (error = sof_filter_getsockopt(so, level, option_name, optval,
706 optlenp, cr)) < 0) &&
707 (error = socket_getopt_common(so, level, option_name, optval,
708 optlenp, flags)) < 0) {
709 error = (*so->so_downcalls->sd_getsockopt)
710 (so->so_proto_handle, level, option_name, optval, optlenp,
711 cr);
712 if (error == ENOPROTOOPT) {
713 if (level == SOL_SOCKET) {
714 /*
715 * If a protocol does not support a particular
716 * socket option, set can fail (not allowed)
717 * but get can not fail. This is the previous
718 * sockfs bahvior.
719 */
720 switch (option_name) {
721 case SO_LINGER:
722 if (*optlenp < (t_uscalar_t)
723 sizeof (struct linger)) {
724 error = EINVAL;
725 break;
726 }
727 error = 0;
728 bzero(optval, sizeof (struct linger));
729 *optlenp = sizeof (struct linger);
730 break;
731 case SO_RCVTIMEO:
732 case SO_SNDTIMEO:
733 if (*optlenp < (t_uscalar_t)
734 sizeof (struct timeval)) {
735 error = EINVAL;
736 break;
737 }
738 error = 0;
739 bzero(optval, sizeof (struct timeval));
740 *optlenp = sizeof (struct timeval);
741 break;
742 case SO_SND_BUFINFO:
743 if (*optlenp < (t_uscalar_t)
744 sizeof (struct so_snd_bufinfo)) {
745 error = EINVAL;
746 break;
747 }
748 error = 0;
749 bzero(optval,
750 sizeof (struct so_snd_bufinfo));
751 *optlenp =
752 sizeof (struct so_snd_bufinfo);
753 break;
754 case SO_DEBUG:
755 case SO_REUSEADDR:
756 case SO_KEEPALIVE:
757 case SO_DONTROUTE:
758 case SO_BROADCAST:
759 case SO_USELOOPBACK:
760 case SO_OOBINLINE:
761 case SO_DGRAM_ERRIND:
762 case SO_SNDBUF:
763 case SO_RCVBUF:
764 error = 0;
765 *((int32_t *)optval) = 0;
766 *optlenp = sizeof (int32_t);
767 break;
768 default:
769 break;
770 }
771 }
772 }
773 }
774
775 SO_UNBLOCK_FALLBACK(so);
776 return (error);
777 }
778
779 int
so_setsockopt(struct sonode * so,int level,int option_name,const void * optval,socklen_t optlen,struct cred * cr)780 so_setsockopt(struct sonode *so, int level, int option_name,
781 const void *optval, socklen_t optlen, struct cred *cr)
782 {
783 int error = 0;
784 struct timeval tl;
785 const void *opt = optval;
786
787 if (level == SOL_FILTER)
788 return (sof_setsockopt(so, option_name, optval, optlen, cr));
789
790 SO_BLOCK_FALLBACK(so,
791 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
792
793 /* X/Open requires this check */
794 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
795 SO_UNBLOCK_FALLBACK(so);
796 if (xnet_check_print)
797 printf("sockfs: X/Open setsockopt check => EINVAL\n");
798 return (EINVAL);
799 }
800
801 if (so->so_filter_active > 0 &&
802 (error = sof_filter_setsockopt(so, level, option_name,
803 (void *)optval, &optlen, cr)) >= 0)
804 goto done;
805
806 if (level == SOL_SOCKET) {
807 switch (option_name) {
808 case SO_RCVTIMEO:
809 case SO_SNDTIMEO: {
810 /*
811 * We pass down these two options to protocol in order
812 * to support some third part protocols which need to
813 * know them. For those protocols which don't care
814 * these two options, simply return 0.
815 */
816 clock_t t_usec;
817
818 if (get_udatamodel() == DATAMODEL_NONE ||
819 get_udatamodel() == DATAMODEL_NATIVE) {
820 if (optlen != sizeof (struct timeval)) {
821 error = EINVAL;
822 goto done;
823 }
824 bcopy((struct timeval *)optval, &tl,
825 sizeof (struct timeval));
826 } else {
827 if (optlen != sizeof (struct timeval32)) {
828 error = EINVAL;
829 goto done;
830 }
831 TIMEVAL32_TO_TIMEVAL(&tl,
832 (struct timeval32 *)optval);
833 }
834 opt = &tl;
835 optlen = sizeof (tl);
836 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
837 mutex_enter(&so->so_lock);
838 if (option_name == SO_RCVTIMEO)
839 so->so_rcvtimeo = drv_usectohz(t_usec);
840 else
841 so->so_sndtimeo = drv_usectohz(t_usec);
842 mutex_exit(&so->so_lock);
843 break;
844 }
845 case SO_RCVBUF:
846 /*
847 * XXX XPG 4.2 applications retrieve SO_RCVBUF from
848 * sockfs since the transport might adjust the value
849 * and not return exactly what was set by the
850 * application.
851 */
852 so->so_xpg_rcvbuf = *(int32_t *)optval;
853 break;
854 }
855 }
856 error = (*so->so_downcalls->sd_setsockopt)
857 (so->so_proto_handle, level, option_name, opt, optlen, cr);
858 done:
859 SO_UNBLOCK_FALLBACK(so);
860 return (error);
861 }
862
863 int
so_ioctl(struct sonode * so,int cmd,intptr_t arg,int mode,struct cred * cr,int32_t * rvalp)864 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
865 struct cred *cr, int32_t *rvalp)
866 {
867 int error = 0;
868
869 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
870
871 /*
872 * If there is a pending error, return error
873 * This can happen if a non blocking operation caused an error.
874 */
875 if (so->so_error != 0) {
876 mutex_enter(&so->so_lock);
877 error = sogeterr(so, B_TRUE);
878 mutex_exit(&so->so_lock);
879 if (error != 0)
880 goto done;
881 }
882
883 /*
884 * calling strioc can result in the socket falling back to TPI,
885 * if that is supported.
886 */
887 if ((so->so_filter_active == 0 ||
888 (error = sof_filter_ioctl(so, cmd, arg, mode,
889 rvalp, cr)) < 0) &&
890 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
891 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
892 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
893 cmd, arg, mode, rvalp, cr);
894 }
895
896 done:
897 SO_UNBLOCK_FALLBACK(so);
898
899 return (error);
900 }
901
902 int
so_poll(struct sonode * so,short events,int anyyet,short * reventsp,struct pollhead ** phpp)903 so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
904 struct pollhead **phpp)
905 {
906 int state = so->so_state;
907 *reventsp = 0;
908
909 /*
910 * In sockets the errors are represented as input/output events
911 */
912 if (so->so_error != 0 &&
913 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
914 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
915 return (0);
916 }
917
918 /*
919 * If the socket is in a state where it can send data
920 * turn on POLLWRBAND and POLLOUT events.
921 */
922 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) {
923 /*
924 * out of band data is allowed even if the connection
925 * is flow controlled
926 */
927 *reventsp |= POLLWRBAND & events;
928 if (!SO_SND_FLOWCTRLD(so)) {
929 /*
930 * As long as there is buffer to send data
931 * turn on POLLOUT events
932 */
933 *reventsp |= POLLOUT & events;
934 }
935 }
936
937 /*
938 * Turn on POLLIN whenever there is data on the receive queue,
939 * or the socket is in a state where no more data will be received.
940 * Also, if the socket is accepting connections, flip the bit if
941 * there is something on the queue.
942 *
943 * We do an initial check for events without holding locks. However,
944 * if there are no event available, then we redo the check for POLLIN
945 * events under the lock.
946 */
947
948 /* Pending connections */
949 if (!list_is_empty(&so->so_acceptq_list))
950 *reventsp |= (POLLIN|POLLRDNORM) & events;
951
952 /* Data */
953 /* so_downcalls is null for sctp */
954 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
955 *reventsp |= (*so->so_downcalls->sd_poll)
956 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
957 CRED()) & events;
958 ASSERT((*reventsp & ~events) == 0);
959 /* do not recheck events */
960 events &= ~SO_PROTO_POLLEV;
961 } else {
962 if (SO_HAVE_DATA(so))
963 *reventsp |= (POLLIN|POLLRDNORM) & events;
964
965 /* Urgent data */
966 if ((state & SS_OOBPEND) != 0) {
967 *reventsp |= (POLLRDBAND | POLLPRI) & events;
968 }
969 }
970
971 if (!*reventsp && !anyyet) {
972 /* Check for read events again, but this time under lock */
973 if (events & (POLLIN|POLLRDNORM)) {
974 mutex_enter(&so->so_lock);
975 if (SO_HAVE_DATA(so) ||
976 !list_is_empty(&so->so_acceptq_list)) {
977 mutex_exit(&so->so_lock);
978 *reventsp |= (POLLIN|POLLRDNORM) & events;
979 return (0);
980 } else {
981 so->so_pollev |= SO_POLLEV_IN;
982 mutex_exit(&so->so_lock);
983 }
984 }
985 *phpp = &so->so_poll_list;
986 }
987 return (0);
988 }
989
990 /*
991 * Generic Upcalls
992 */
993 void
so_connected(sock_upper_handle_t sock_handle,sock_connid_t id,cred_t * peer_cred,pid_t peer_cpid)994 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
995 cred_t *peer_cred, pid_t peer_cpid)
996 {
997 struct sonode *so = (struct sonode *)sock_handle;
998
999 mutex_enter(&so->so_lock);
1000 ASSERT(so->so_proto_handle != NULL);
1001
1002 if (peer_cred != NULL) {
1003 if (so->so_peercred != NULL)
1004 crfree(so->so_peercred);
1005 crhold(peer_cred);
1006 so->so_peercred = peer_cred;
1007 so->so_cpid = peer_cpid;
1008 }
1009
1010 so->so_proto_connid = id;
1011 soisconnected(so);
1012 /*
1013 * Wake ones who're waiting for conn to become established.
1014 */
1015 so_notify_connected(so);
1016 }
1017
1018 int
so_disconnected(sock_upper_handle_t sock_handle,sock_connid_t id,int error)1019 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
1020 {
1021 struct sonode *so = (struct sonode *)sock_handle;
1022 boolean_t connect_failed;
1023
1024 mutex_enter(&so->so_lock);
1025 connect_failed = so->so_state & SS_ISCONNECTED;
1026 so->so_proto_connid = id;
1027 soisdisconnected(so, error);
1028 so_notify_disconnected(so, connect_failed, error);
1029
1030 return (0);
1031 }
1032
1033 void
so_opctl(sock_upper_handle_t sock_handle,sock_opctl_action_t action,uintptr_t arg)1034 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
1035 uintptr_t arg)
1036 {
1037 struct sonode *so = (struct sonode *)sock_handle;
1038
1039 switch (action) {
1040 case SOCK_OPCTL_SHUT_SEND:
1041 mutex_enter(&so->so_lock);
1042 socantsendmore(so);
1043 so_notify_disconnecting(so);
1044 break;
1045 case SOCK_OPCTL_SHUT_RECV: {
1046 mutex_enter(&so->so_lock);
1047 socantrcvmore(so);
1048 so_notify_eof(so);
1049 break;
1050 }
1051 case SOCK_OPCTL_ENAB_ACCEPT:
1052 mutex_enter(&so->so_lock);
1053 so->so_state |= SS_ACCEPTCONN;
1054 so->so_backlog = (unsigned int)arg;
1055 /*
1056 * The protocol can stop generating newconn upcalls when
1057 * the backlog is full, so to make sure the listener does
1058 * not end up with a queue full of deferred connections
1059 * we reduce the backlog by one. Thus the listener will
1060 * start closing deferred connections before the backlog
1061 * is full.
1062 */
1063 if (so->so_filter_active > 0)
1064 so->so_backlog = MAX(1, so->so_backlog - 1);
1065 mutex_exit(&so->so_lock);
1066 break;
1067 default:
1068 ASSERT(0);
1069 break;
1070 }
1071 }
1072
1073 void
so_txq_full(sock_upper_handle_t sock_handle,boolean_t qfull)1074 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
1075 {
1076 struct sonode *so = (struct sonode *)sock_handle;
1077
1078 if (qfull) {
1079 so_snd_qfull(so);
1080 } else {
1081 so_snd_qnotfull(so);
1082 mutex_enter(&so->so_lock);
1083 /* so_notify_writable drops so_lock */
1084 so_notify_writable(so);
1085 }
1086 }
1087
1088 sock_upper_handle_t
so_newconn(sock_upper_handle_t parenthandle,sock_lower_handle_t proto_handle,sock_downcalls_t * sock_downcalls,struct cred * peer_cred,pid_t peer_cpid,sock_upcalls_t ** sock_upcallsp)1089 so_newconn(sock_upper_handle_t parenthandle,
1090 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
1091 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
1092 {
1093 struct sonode *so = (struct sonode *)parenthandle;
1094 struct sonode *nso;
1095 int error;
1096
1097 ASSERT(proto_handle != NULL);
1098
1099 if ((so->so_state & SS_ACCEPTCONN) == 0 ||
1100 (so->so_acceptq_len >= so->so_backlog &&
1101 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) {
1102 return (NULL);
1103 }
1104
1105 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
1106 &error);
1107 if (nso == NULL)
1108 return (NULL);
1109
1110 if (peer_cred != NULL) {
1111 crhold(peer_cred);
1112 nso->so_peercred = peer_cred;
1113 nso->so_cpid = peer_cpid;
1114 }
1115 nso->so_listener = so;
1116
1117 /*
1118 * The new socket (nso), proto_handle and sock_upcallsp are all
1119 * valid at this point. But as soon as nso is placed in the accept
1120 * queue that can no longer be assumed (since an accept() thread may
1121 * pull it off the queue and close the socket).
1122 */
1123 *sock_upcallsp = &so_upcalls;
1124
1125 mutex_enter(&so->so_acceptq_lock);
1126 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) {
1127 mutex_exit(&so->so_acceptq_lock);
1128 ASSERT(nso->so_count == 1);
1129 nso->so_count--;
1130 nso->so_listener = NULL;
1131 /* drop proto ref */
1132 VN_RELE(SOTOV(nso));
1133 socket_destroy(nso);
1134 return (NULL);
1135 } else {
1136 so->so_acceptq_len++;
1137 if (nso->so_state & SS_FIL_DEFER) {
1138 list_insert_tail(&so->so_acceptq_defer, nso);
1139 mutex_exit(&so->so_acceptq_lock);
1140 } else {
1141 list_insert_tail(&so->so_acceptq_list, nso);
1142 cv_signal(&so->so_acceptq_cv);
1143 mutex_exit(&so->so_acceptq_lock);
1144 mutex_enter(&so->so_lock);
1145 so_notify_newconn(so);
1146 }
1147
1148 return ((sock_upper_handle_t)nso);
1149 }
1150 }
1151
1152 void
so_set_prop(sock_upper_handle_t sock_handle,struct sock_proto_props * soppp)1153 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
1154 {
1155 struct sonode *so;
1156
1157 so = (struct sonode *)sock_handle;
1158
1159 mutex_enter(&so->so_lock);
1160
1161 if (soppp->sopp_flags & SOCKOPT_MAXBLK)
1162 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
1163 if (soppp->sopp_flags & SOCKOPT_WROFF)
1164 so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
1165 if (soppp->sopp_flags & SOCKOPT_TAIL)
1166 so->so_proto_props.sopp_tail = soppp->sopp_tail;
1167 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
1168 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
1169 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
1170 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
1171 if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
1172 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
1173 if (soppp->sopp_flags & SOCKOPT_MINPSZ)
1174 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
1175 if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
1176 if (soppp->sopp_zcopyflag & ZCVMSAFE) {
1177 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
1178 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
1179 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
1180 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
1181 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
1182 }
1183
1184 if (soppp->sopp_zcopyflag & COPYCACHED) {
1185 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
1186 }
1187 }
1188 if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
1189 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
1190 if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
1191 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
1192 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
1193 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
1194 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
1195 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
1196 if (soppp->sopp_flags & SOCKOPT_LOOPBACK)
1197 so->so_proto_props.sopp_loopback = soppp->sopp_loopback;
1198
1199 mutex_exit(&so->so_lock);
1200
1201 if (so->so_filter_active > 0) {
1202 sof_instance_t *inst;
1203 ssize_t maxblk;
1204 ushort_t wroff, tail;
1205 maxblk = so->so_proto_props.sopp_maxblk;
1206 wroff = so->so_proto_props.sopp_wroff;
1207 tail = so->so_proto_props.sopp_tail;
1208 for (inst = so->so_filter_bottom; inst != NULL;
1209 inst = inst->sofi_prev) {
1210 if (SOF_INTERESTED(inst, mblk_prop)) {
1211 (*inst->sofi_ops->sofop_mblk_prop)(
1212 (sof_handle_t)inst, inst->sofi_cookie,
1213 &maxblk, &wroff, &tail);
1214 }
1215 }
1216 mutex_enter(&so->so_lock);
1217 so->so_proto_props.sopp_maxblk = maxblk;
1218 so->so_proto_props.sopp_wroff = wroff;
1219 so->so_proto_props.sopp_tail = tail;
1220 mutex_exit(&so->so_lock);
1221 }
1222 #ifdef DEBUG
1223 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
1224 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
1225 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
1226 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ |
1227 SOCKOPT_LOOPBACK);
1228 ASSERT(soppp->sopp_flags == 0);
1229 #endif
1230 }
1231
1232 /* ARGSUSED */
1233 ssize_t
so_queue_msg_impl(struct sonode * so,mblk_t * mp,size_t msg_size,int flags,int * errorp,boolean_t * force_pushp,sof_instance_t * filter)1234 so_queue_msg_impl(struct sonode *so, mblk_t *mp,
1235 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp,
1236 sof_instance_t *filter)
1237 {
1238 boolean_t force_push = B_TRUE;
1239 int space_left;
1240 sodirect_t *sodp = so->so_direct;
1241
1242 ASSERT(errorp != NULL);
1243 *errorp = 0;
1244 if (mp == NULL) {
1245 if (so->so_downcalls->sd_recv_uio != NULL) {
1246 mutex_enter(&so->so_lock);
1247 /* the notify functions will drop the lock */
1248 if (flags & MSG_OOB)
1249 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1250 else
1251 so_notify_data(so, msg_size);
1252 return (0);
1253 }
1254 ASSERT(msg_size == 0);
1255 mutex_enter(&so->so_lock);
1256 goto space_check;
1257 }
1258
1259 ASSERT(mp->b_next == NULL);
1260 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
1261 ASSERT(msg_size == msgdsize(mp));
1262
1263 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1264 /* The read pointer is not aligned correctly for TPI */
1265 zcmn_err(getzoneid(), CE_WARN,
1266 "sockfs: Unaligned TPI message received. rptr = %p\n",
1267 (void *)mp->b_rptr);
1268 freemsg(mp);
1269 mutex_enter(&so->so_lock);
1270 if (sodp != NULL)
1271 SOD_UIOAFINI(sodp);
1272 goto space_check;
1273 }
1274
1275 if (so->so_filter_active > 0) {
1276 for (; filter != NULL; filter = filter->sofi_prev) {
1277 if (!SOF_INTERESTED(filter, data_in))
1278 continue;
1279 mp = (*filter->sofi_ops->sofop_data_in)(
1280 (sof_handle_t)filter, filter->sofi_cookie, mp,
1281 flags, &msg_size);
1282 ASSERT(msgdsize(mp) == msg_size);
1283 DTRACE_PROBE2(filter__data, (sof_instance_t), filter,
1284 (mblk_t *), mp);
1285 /* Data was consumed/dropped, just do space check */
1286 if (msg_size == 0) {
1287 mutex_enter(&so->so_lock);
1288 goto space_check;
1289 }
1290 }
1291 }
1292
1293 if (flags & MSG_OOB) {
1294 so_queue_oob(so, mp, msg_size);
1295 mutex_enter(&so->so_lock);
1296 goto space_check;
1297 }
1298
1299 if (force_pushp != NULL)
1300 force_push = *force_pushp;
1301
1302 mutex_enter(&so->so_lock);
1303 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) {
1304 if (sodp != NULL)
1305 SOD_DISABLE(sodp);
1306 mutex_exit(&so->so_lock);
1307 *errorp = EOPNOTSUPP;
1308 return (-1);
1309 }
1310 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) {
1311 freemsg(mp);
1312 if (sodp != NULL)
1313 SOD_DISABLE(sodp);
1314 mutex_exit(&so->so_lock);
1315 return (0);
1316 }
1317
1318 /* process the mblk via I/OAT if capable */
1319 if (sodp != NULL && sodp->sod_enabled) {
1320 if (DB_TYPE(mp) == M_DATA) {
1321 sod_uioa_mblk_init(sodp, mp, msg_size);
1322 } else {
1323 SOD_UIOAFINI(sodp);
1324 }
1325 }
1326
1327 if (mp->b_next == NULL) {
1328 so_enqueue_msg(so, mp, msg_size);
1329 } else {
1330 do {
1331 mblk_t *nmp;
1332
1333 if ((nmp = mp->b_next) != NULL) {
1334 mp->b_next = NULL;
1335 }
1336 so_enqueue_msg(so, mp, msgdsize(mp));
1337 mp = nmp;
1338 } while (mp != NULL);
1339 }
1340
1341 space_left = so->so_rcvbuf - so->so_rcv_queued;
1342 if (space_left <= 0) {
1343 so->so_flowctrld = B_TRUE;
1344 *errorp = ENOSPC;
1345 space_left = -1;
1346 }
1347
1348 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
1349 so->so_rcv_queued >= so->so_rcv_wanted) {
1350 SOCKET_TIMER_CANCEL(so);
1351 /*
1352 * so_notify_data will release the lock
1353 */
1354 so_notify_data(so, so->so_rcv_queued);
1355
1356 if (force_pushp != NULL)
1357 *force_pushp = B_TRUE;
1358 goto done;
1359 } else if (so->so_rcv_timer_tid == 0) {
1360 /* Make sure the recv push timer is running */
1361 SOCKET_TIMER_START(so);
1362 }
1363
1364 done_unlock:
1365 mutex_exit(&so->so_lock);
1366 done:
1367 return (space_left);
1368
1369 space_check:
1370 space_left = so->so_rcvbuf - so->so_rcv_queued;
1371 if (space_left <= 0) {
1372 so->so_flowctrld = B_TRUE;
1373 *errorp = ENOSPC;
1374 space_left = -1;
1375 }
1376 goto done_unlock;
1377 }
1378
1379 #pragma inline(so_queue_msg_impl)
1380
1381 ssize_t
so_queue_msg(sock_upper_handle_t sock_handle,mblk_t * mp,size_t msg_size,int flags,int * errorp,boolean_t * force_pushp)1382 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
1383 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp)
1384 {
1385 struct sonode *so = (struct sonode *)sock_handle;
1386
1387 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp,
1388 so->so_filter_bottom));
1389 }
1390
1391 /*
1392 * Set the offset of where the oob data is relative to the bytes in
1393 * queued. Also generate SIGURG
1394 */
1395 void
so_signal_oob(sock_upper_handle_t sock_handle,ssize_t offset)1396 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
1397 {
1398 struct sonode *so;
1399
1400 ASSERT(offset >= 0);
1401 so = (struct sonode *)sock_handle;
1402 mutex_enter(&so->so_lock);
1403 if (so->so_direct != NULL)
1404 SOD_UIOAFINI(so->so_direct);
1405
1406 /*
1407 * New urgent data on the way so forget about any old
1408 * urgent data.
1409 */
1410 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1411
1412 /*
1413 * Record that urgent data is pending.
1414 */
1415 so->so_state |= SS_OOBPEND;
1416
1417 if (so->so_oobmsg != NULL) {
1418 dprintso(so, 1, ("sock: discarding old oob\n"));
1419 freemsg(so->so_oobmsg);
1420 so->so_oobmsg = NULL;
1421 }
1422
1423 /*
1424 * set the offset where the urgent byte is
1425 */
1426 so->so_oobmark = so->so_rcv_queued + offset;
1427 if (so->so_oobmark == 0)
1428 so->so_state |= SS_RCVATMARK;
1429 else
1430 so->so_state &= ~SS_RCVATMARK;
1431
1432 so_notify_oobsig(so);
1433 }
1434
1435 /*
1436 * Queue the OOB byte
1437 */
1438 static void
so_queue_oob(struct sonode * so,mblk_t * mp,size_t len)1439 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len)
1440 {
1441 mutex_enter(&so->so_lock);
1442 if (so->so_direct != NULL)
1443 SOD_UIOAFINI(so->so_direct);
1444
1445 ASSERT(mp != NULL);
1446 if (!IS_SO_OOB_INLINE(so)) {
1447 so->so_oobmsg = mp;
1448 so->so_state |= SS_HAVEOOBDATA;
1449 } else {
1450 so_enqueue_msg(so, mp, len);
1451 }
1452
1453 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1454 }
1455
1456 int
so_close(struct sonode * so,int flag,struct cred * cr)1457 so_close(struct sonode *so, int flag, struct cred *cr)
1458 {
1459 int error;
1460
1461 /*
1462 * No new data will be enqueued once the CLOSING flag is set.
1463 */
1464 mutex_enter(&so->so_lock);
1465 so->so_state |= SS_CLOSING;
1466 ASSERT(so_verify_oobstate(so));
1467 so_rcv_flush(so);
1468 mutex_exit(&so->so_lock);
1469
1470 if (so->so_filter_active > 0)
1471 sof_sonode_closing(so);
1472
1473 if (so->so_state & SS_ACCEPTCONN) {
1474 /*
1475 * We grab and release the accept lock to ensure that any
1476 * thread about to insert a socket in so_newconn completes
1477 * before we flush the queue. Any thread calling so_newconn
1478 * after we drop the lock will observe the SS_CLOSING flag,
1479 * which will stop it from inserting the socket in the queue.
1480 */
1481 mutex_enter(&so->so_acceptq_lock);
1482 mutex_exit(&so->so_acceptq_lock);
1483
1484 so_acceptq_flush(so, B_TRUE);
1485 }
1486
1487 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
1488 switch (error) {
1489 default:
1490 /* Protocol made a synchronous close; remove proto ref */
1491 VN_RELE(SOTOV(so));
1492 break;
1493 case EINPROGRESS:
1494 /*
1495 * Protocol is in the process of closing, it will make a
1496 * 'closed' upcall to remove the reference.
1497 */
1498 error = 0;
1499 break;
1500 }
1501
1502 return (error);
1503 }
1504
1505 /*
1506 * Upcall made by the protocol when it's doing an asynchronous close. It
1507 * will drop the protocol's reference on the socket.
1508 */
1509 void
so_closed(sock_upper_handle_t sock_handle)1510 so_closed(sock_upper_handle_t sock_handle)
1511 {
1512 struct sonode *so = (struct sonode *)sock_handle;
1513
1514 VN_RELE(SOTOV(so));
1515 }
1516
1517 void
so_zcopy_notify(sock_upper_handle_t sock_handle)1518 so_zcopy_notify(sock_upper_handle_t sock_handle)
1519 {
1520 struct sonode *so = (struct sonode *)sock_handle;
1521
1522 mutex_enter(&so->so_lock);
1523 so->so_copyflag |= STZCNOTIFY;
1524 cv_broadcast(&so->so_copy_cv);
1525 mutex_exit(&so->so_lock);
1526 }
1527
1528 void
so_set_error(sock_upper_handle_t sock_handle,int error)1529 so_set_error(sock_upper_handle_t sock_handle, int error)
1530 {
1531 struct sonode *so = (struct sonode *)sock_handle;
1532
1533 mutex_enter(&so->so_lock);
1534
1535 soseterror(so, error);
1536
1537 so_notify_error(so);
1538 }
1539
1540 /*
1541 * so_recvmsg - read data from the socket
1542 *
1543 * There are two ways of obtaining data; either we ask the protocol to
1544 * copy directly into the supplied buffer, or we copy data from the
1545 * sonode's receive queue. The decision which one to use depends on
1546 * whether the protocol has a sd_recv_uio down call.
1547 */
1548 int
so_recvmsg(struct sonode * so,struct nmsghdr * msg,struct uio * uiop,struct cred * cr)1549 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
1550 struct cred *cr)
1551 {
1552 rval_t rval;
1553 int flags = 0;
1554 t_uscalar_t controllen, namelen;
1555 int error = 0;
1556 int ret;
1557 mblk_t *mctlp = NULL;
1558 union T_primitives *tpr;
1559 void *control;
1560 ssize_t saved_resid;
1561 struct uio *suiop;
1562
1563 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
1564
1565 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
1566 (so->so_mode & SM_CONNREQUIRED)) {
1567 SO_UNBLOCK_FALLBACK(so);
1568 return (ENOTCONN);
1569 }
1570
1571 if (msg->msg_flags & MSG_PEEK)
1572 msg->msg_flags &= ~MSG_WAITALL;
1573
1574 if (so->so_mode & SM_ATOMIC)
1575 msg->msg_flags |= MSG_TRUNC;
1576
1577 if (msg->msg_flags & MSG_OOB) {
1578 if ((so->so_mode & SM_EXDATA) == 0) {
1579 error = EOPNOTSUPP;
1580 } else if (so->so_downcalls->sd_recv_uio != NULL) {
1581 error = (*so->so_downcalls->sd_recv_uio)
1582 (so->so_proto_handle, uiop, msg, cr);
1583 } else {
1584 error = sorecvoob(so, msg, uiop, msg->msg_flags,
1585 IS_SO_OOB_INLINE(so));
1586 }
1587 SO_UNBLOCK_FALLBACK(so);
1588 return (error);
1589 }
1590
1591 /*
1592 * If the protocol has the recv down call, then pass the request
1593 * down.
1594 */
1595 if (so->so_downcalls->sd_recv_uio != NULL) {
1596 error = (*so->so_downcalls->sd_recv_uio)
1597 (so->so_proto_handle, uiop, msg, cr);
1598 SO_UNBLOCK_FALLBACK(so);
1599 return (error);
1600 }
1601
1602 /*
1603 * Reading data from the socket buffer
1604 */
1605 flags = msg->msg_flags;
1606 msg->msg_flags = 0;
1607
1608 /*
1609 * Set msg_controllen and msg_namelen to zero here to make it
1610 * simpler in the cases that no control or name is returned.
1611 */
1612 controllen = msg->msg_controllen;
1613 namelen = msg->msg_namelen;
1614 msg->msg_controllen = 0;
1615 msg->msg_namelen = 0;
1616
1617 mutex_enter(&so->so_lock);
1618 /* Set SOREADLOCKED */
1619 error = so_lock_read_intr(so,
1620 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
1621 mutex_exit(&so->so_lock);
1622 if (error) {
1623 SO_UNBLOCK_FALLBACK(so);
1624 return (error);
1625 }
1626
1627 suiop = sod_rcv_init(so, flags, &uiop);
1628 retry:
1629 saved_resid = uiop->uio_resid;
1630 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
1631 if (error != 0) {
1632 goto out;
1633 }
1634 /*
1635 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
1636 * For non-datagrams MOREDATA is used to set MSG_EOR.
1637 */
1638 ASSERT(!(rval.r_val1 & MORECTL));
1639 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
1640 msg->msg_flags |= MSG_TRUNC;
1641 if (mctlp == NULL) {
1642 dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
1643
1644 mutex_enter(&so->so_lock);
1645 /* Set MSG_EOR based on MOREDATA */
1646 if (!(rval.r_val1 & MOREDATA)) {
1647 if (so->so_state & SS_SAVEDEOR) {
1648 msg->msg_flags |= MSG_EOR;
1649 so->so_state &= ~SS_SAVEDEOR;
1650 }
1651 }
1652 /*
1653 * If some data was received (i.e. not EOF) and the
1654 * read/recv* has not been satisfied wait for some more.
1655 */
1656 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1657 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1658 mutex_exit(&so->so_lock);
1659 flags |= MSG_NOMARK;
1660 goto retry;
1661 }
1662
1663 goto out_locked;
1664 }
1665 /* so_queue_msg has already verified length and alignment */
1666 tpr = (union T_primitives *)mctlp->b_rptr;
1667 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
1668 switch (tpr->type) {
1669 case T_DATA_IND: {
1670 /*
1671 * Set msg_flags to MSG_EOR based on
1672 * MORE_flag and MOREDATA.
1673 */
1674 mutex_enter(&so->so_lock);
1675 so->so_state &= ~SS_SAVEDEOR;
1676 if (!(tpr->data_ind.MORE_flag & 1)) {
1677 if (!(rval.r_val1 & MOREDATA))
1678 msg->msg_flags |= MSG_EOR;
1679 else
1680 so->so_state |= SS_SAVEDEOR;
1681 }
1682 freemsg(mctlp);
1683 /*
1684 * If some data was received (i.e. not EOF) and the
1685 * read/recv* has not been satisfied wait for some more.
1686 */
1687 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1688 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1689 mutex_exit(&so->so_lock);
1690 flags |= MSG_NOMARK;
1691 goto retry;
1692 }
1693 goto out_locked;
1694 }
1695 case T_UNITDATA_IND: {
1696 void *addr;
1697 t_uscalar_t addrlen;
1698 void *abuf;
1699 t_uscalar_t optlen;
1700 void *opt;
1701
1702 if (namelen != 0) {
1703 /* Caller wants source address */
1704 addrlen = tpr->unitdata_ind.SRC_length;
1705 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
1706 addrlen, 1);
1707 if (addr == NULL) {
1708 freemsg(mctlp);
1709 error = EPROTO;
1710 eprintsoline(so, error);
1711 goto out;
1712 }
1713 ASSERT(so->so_family != AF_UNIX);
1714 }
1715 optlen = tpr->unitdata_ind.OPT_length;
1716 if (optlen != 0) {
1717 t_uscalar_t ncontrollen;
1718
1719 /*
1720 * Extract any source address option.
1721 * Determine how large cmsg buffer is needed.
1722 */
1723 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
1724 optlen, __TPI_ALIGN_SIZE);
1725
1726 if (opt == NULL) {
1727 freemsg(mctlp);
1728 error = EPROTO;
1729 eprintsoline(so, error);
1730 goto out;
1731 }
1732 if (so->so_family == AF_UNIX)
1733 so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
1734 ncontrollen = so_cmsglen(mctlp, opt, optlen,
1735 !(flags & MSG_XPG4_2));
1736 if (controllen != 0)
1737 controllen = ncontrollen;
1738 else if (ncontrollen != 0)
1739 msg->msg_flags |= MSG_CTRUNC;
1740 } else {
1741 controllen = 0;
1742 }
1743
1744 if (namelen != 0) {
1745 /*
1746 * Return address to caller.
1747 * Caller handles truncation if length
1748 * exceeds msg_namelen.
1749 * NOTE: AF_UNIX NUL termination is ensured by
1750 * the sender's copyin_name().
1751 */
1752 abuf = kmem_alloc(addrlen, KM_SLEEP);
1753
1754 bcopy(addr, abuf, addrlen);
1755 msg->msg_name = abuf;
1756 msg->msg_namelen = addrlen;
1757 }
1758
1759 if (controllen != 0) {
1760 /*
1761 * Return control msg to caller.
1762 * Caller handles truncation if length
1763 * exceeds msg_controllen.
1764 */
1765 control = kmem_zalloc(controllen, KM_SLEEP);
1766
1767 error = so_opt2cmsg(mctlp, opt, optlen,
1768 !(flags & MSG_XPG4_2), control, controllen);
1769 if (error) {
1770 freemsg(mctlp);
1771 if (msg->msg_namelen != 0)
1772 kmem_free(msg->msg_name,
1773 msg->msg_namelen);
1774 kmem_free(control, controllen);
1775 eprintsoline(so, error);
1776 goto out;
1777 }
1778 msg->msg_control = control;
1779 msg->msg_controllen = controllen;
1780 }
1781
1782 freemsg(mctlp);
1783 goto out;
1784 }
1785 case T_OPTDATA_IND: {
1786 struct T_optdata_req *tdr;
1787 void *opt;
1788 t_uscalar_t optlen;
1789
1790 tdr = (struct T_optdata_req *)mctlp->b_rptr;
1791 optlen = tdr->OPT_length;
1792 if (optlen != 0) {
1793 t_uscalar_t ncontrollen;
1794 /*
1795 * Determine how large cmsg buffer is needed.
1796 */
1797 opt = sogetoff(mctlp,
1798 tpr->optdata_ind.OPT_offset, optlen,
1799 __TPI_ALIGN_SIZE);
1800
1801 if (opt == NULL) {
1802 freemsg(mctlp);
1803 error = EPROTO;
1804 eprintsoline(so, error);
1805 goto out;
1806 }
1807
1808 ncontrollen = so_cmsglen(mctlp, opt, optlen,
1809 !(flags & MSG_XPG4_2));
1810 if (controllen != 0)
1811 controllen = ncontrollen;
1812 else if (ncontrollen != 0)
1813 msg->msg_flags |= MSG_CTRUNC;
1814 } else {
1815 controllen = 0;
1816 }
1817
1818 if (controllen != 0) {
1819 /*
1820 * Return control msg to caller.
1821 * Caller handles truncation if length
1822 * exceeds msg_controllen.
1823 */
1824 control = kmem_zalloc(controllen, KM_SLEEP);
1825
1826 error = so_opt2cmsg(mctlp, opt, optlen,
1827 !(flags & MSG_XPG4_2), control, controllen);
1828 if (error) {
1829 freemsg(mctlp);
1830 kmem_free(control, controllen);
1831 eprintsoline(so, error);
1832 goto out;
1833 }
1834 msg->msg_control = control;
1835 msg->msg_controllen = controllen;
1836 }
1837
1838 /*
1839 * Set msg_flags to MSG_EOR based on
1840 * DATA_flag and MOREDATA.
1841 */
1842 mutex_enter(&so->so_lock);
1843 so->so_state &= ~SS_SAVEDEOR;
1844 if (!(tpr->data_ind.MORE_flag & 1)) {
1845 if (!(rval.r_val1 & MOREDATA))
1846 msg->msg_flags |= MSG_EOR;
1847 else
1848 so->so_state |= SS_SAVEDEOR;
1849 }
1850 freemsg(mctlp);
1851 /*
1852 * If some data was received (i.e. not EOF) and the
1853 * read/recv* has not been satisfied wait for some more.
1854 * Not possible to wait if control info was received.
1855 */
1856 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1857 controllen == 0 &&
1858 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1859 mutex_exit(&so->so_lock);
1860 flags |= MSG_NOMARK;
1861 goto retry;
1862 }
1863 goto out_locked;
1864 }
1865 default:
1866 cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
1867 tpr->type);
1868 freemsg(mctlp);
1869 error = EPROTO;
1870 ASSERT(0);
1871 }
1872 out:
1873 mutex_enter(&so->so_lock);
1874 out_locked:
1875 ret = sod_rcv_done(so, suiop, uiop);
1876 if (ret != 0 && error == 0)
1877 error = ret;
1878
1879 so_unlock_read(so); /* Clear SOREADLOCKED */
1880 mutex_exit(&so->so_lock);
1881
1882 SO_UNBLOCK_FALLBACK(so);
1883
1884 return (error);
1885 }
1886
1887 sonodeops_t so_sonodeops = {
1888 so_init, /* sop_init */
1889 so_accept, /* sop_accept */
1890 so_bind, /* sop_bind */
1891 so_listen, /* sop_listen */
1892 so_connect, /* sop_connect */
1893 so_recvmsg, /* sop_recvmsg */
1894 so_sendmsg, /* sop_sendmsg */
1895 so_sendmblk, /* sop_sendmblk */
1896 so_getpeername, /* sop_getpeername */
1897 so_getsockname, /* sop_getsockname */
1898 so_shutdown, /* sop_shutdown */
1899 so_getsockopt, /* sop_getsockopt */
1900 so_setsockopt, /* sop_setsockopt */
1901 so_ioctl, /* sop_ioctl */
1902 so_poll, /* sop_poll */
1903 so_close, /* sop_close */
1904 };
1905
1906 sock_upcalls_t so_upcalls = {
1907 so_newconn,
1908 so_connected,
1909 so_disconnected,
1910 so_opctl,
1911 so_queue_msg,
1912 so_set_prop,
1913 so_txq_full,
1914 so_signal_oob,
1915 so_zcopy_notify,
1916 so_set_error,
1917 so_closed
1918 };
1919