xref: /netbsd-src/sys/kern/uipc_syscalls.c (revision 3816d47b2c42fcd6e549e3407f842a5b1a1d23ad)
1 /*	$NetBSD: uipc_syscalls.c,v 1.139 2009/12/29 04:23:43 elad Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1982, 1986, 1989, 1990, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)uipc_syscalls.c	8.6 (Berkeley) 2/14/95
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.139 2009/12/29 04:23:43 elad Exp $");
65 
66 #include "opt_pipe.h"
67 
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/filedesc.h>
71 #include <sys/proc.h>
72 #include <sys/file.h>
73 #include <sys/buf.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/signalvar.h>
80 #include <sys/un.h>
81 #include <sys/ktrace.h>
82 #include <sys/event.h>
83 #include <sys/kauth.h>
84 
85 #include <sys/mount.h>
86 #include <sys/syscallargs.h>
87 
88 #include <uvm/uvm_extern.h>
89 
90 /*
91  * System call interface to the socket abstraction.
92  */
93 extern const struct fileops socketops;
94 
95 int
96 sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, register_t *retval)
97 {
98 	/* {
99 		syscallarg(int)	domain;
100 		syscallarg(int)	type;
101 		syscallarg(int)	protocol;
102 	} */
103 	int		fd, error;
104 
105 	error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
106 			 SCARG(uap, protocol), l, &fd);
107 	if (error == 0)
108 		*retval = fd;
109 	return error;
110 }
111 
112 /* ARGSUSED */
113 int
114 sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval)
115 {
116 	/* {
117 		syscallarg(int)				s;
118 		syscallarg(const struct sockaddr *)	name;
119 		syscallarg(unsigned int)		namelen;
120 	} */
121 	struct mbuf	*nam;
122 	int		error;
123 
124 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
125 	    MT_SONAME);
126 	if (error)
127 		return error;
128 
129 	return do_sys_bind(l, SCARG(uap, s), nam);
130 }
131 
132 int
133 do_sys_bind(struct lwp *l, int fd, struct mbuf *nam)
134 {
135 	struct socket	*so;
136 	int		error;
137 
138 	if ((error = fd_getsock(fd, &so)) != 0) {
139 		m_freem(nam);
140 		return (error);
141 	}
142 	MCLAIM(nam, so->so_mowner);
143 	error = sobind(so, nam, l);
144 	m_freem(nam);
145 	fd_putfile(fd);
146 	return error;
147 }
148 
149 /* ARGSUSED */
150 int
151 sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval)
152 {
153 	/* {
154 		syscallarg(int)	s;
155 		syscallarg(int)	backlog;
156 	} */
157 	struct socket	*so;
158 	int		error;
159 
160 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
161 		return (error);
162 	error = solisten(so, SCARG(uap, backlog), l);
163 	fd_putfile(SCARG(uap, s));
164 	return error;
165 }
166 
167 int
168 do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock)
169 {
170 	file_t		*fp, *fp2;
171 	struct mbuf	*nam;
172 	int		error, fd;
173 	struct socket	*so, *so2;
174 	short		wakeup_state = 0;
175 
176 	if ((fp = fd_getfile(sock)) == NULL)
177 		return (EBADF);
178 	if (fp->f_type != DTYPE_SOCKET) {
179 		fd_putfile(sock);
180 		return (ENOTSOCK);
181 	}
182 	if ((error = fd_allocfile(&fp2, &fd)) != 0) {
183 		fd_putfile(sock);
184 		return (error);
185 	}
186 	nam = m_get(M_WAIT, MT_SONAME);
187 	*new_sock = fd;
188 	so = fp->f_data;
189 	solock(so);
190 	if (!(so->so_proto->pr_flags & PR_LISTEN)) {
191 		error = EOPNOTSUPP;
192 		goto bad;
193 	}
194 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
195 		error = EINVAL;
196 		goto bad;
197 	}
198 	if (so->so_nbio && so->so_qlen == 0) {
199 		error = EWOULDBLOCK;
200 		goto bad;
201 	}
202 	while (so->so_qlen == 0 && so->so_error == 0) {
203 		if (so->so_state & SS_CANTRCVMORE) {
204 			so->so_error = ECONNABORTED;
205 			break;
206 		}
207 		if (wakeup_state & SS_RESTARTSYS) {
208 			error = ERESTART;
209 			goto bad;
210 		}
211 		error = sowait(so, true, 0);
212 		if (error) {
213 			goto bad;
214 		}
215 		wakeup_state = so->so_state;
216 	}
217 	if (so->so_error) {
218 		error = so->so_error;
219 		so->so_error = 0;
220 		goto bad;
221 	}
222 	/* connection has been removed from the listen queue */
223 	KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
224 	so2 = TAILQ_FIRST(&so->so_q);
225 	if (soqremque(so2, 1) == 0)
226 		panic("accept");
227 	fp2->f_type = DTYPE_SOCKET;
228 	fp2->f_flag = fp->f_flag;
229 	fp2->f_ops = &socketops;
230 	fp2->f_data = so2;
231 	error = soaccept(so2, nam);
232 	so2->so_cred = kauth_cred_dup(so->so_cred);
233 	sounlock(so);
234 	if (error) {
235 		/* an error occurred, free the file descriptor and mbuf */
236 		kauth_cred_free(so2->so_cred);
237 		m_freem(nam);
238 		mutex_enter(&fp2->f_lock);
239 		fp2->f_count++;
240 		mutex_exit(&fp2->f_lock);
241 		closef(fp2);
242 		fd_abort(curproc, NULL, fd);
243 	} else {
244 		fd_affix(curproc, fp2, fd);
245 		*name = nam;
246 	}
247 	fd_putfile(sock);
248 	return (error);
249  bad:
250  	sounlock(so);
251  	m_freem(nam);
252 	fd_putfile(sock);
253  	fd_abort(curproc, fp2, fd);
254  	return (error);
255 }
256 
257 int
258 sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval)
259 {
260 	/* {
261 		syscallarg(int)			s;
262 		syscallarg(struct sockaddr *)	name;
263 		syscallarg(unsigned int *)	anamelen;
264 	} */
265 	int error, fd;
266 	struct mbuf *name;
267 
268 	error = do_sys_accept(l, SCARG(uap, s), &name, retval);
269 	if (error != 0)
270 		return error;
271 	error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
272 	    MSG_LENUSRSPACE, name);
273 	if (name != NULL)
274 		m_free(name);
275 	if (error != 0) {
276 		fd = (int)*retval;
277 		if (fd_getfile(fd) != NULL)
278 			(void)fd_close(fd);
279 	}
280 	return error;
281 }
282 
283 /* ARGSUSED */
284 int
285 sys_connect(struct lwp *l, const struct sys_connect_args *uap, register_t *retval)
286 {
287 	/* {
288 		syscallarg(int)				s;
289 		syscallarg(const struct sockaddr *)	name;
290 		syscallarg(unsigned int)		namelen;
291 	} */
292 	int		error;
293 	struct mbuf	*nam;
294 
295 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
296 	    MT_SONAME);
297 	if (error)
298 		return error;
299 	return do_sys_connect(l,  SCARG(uap, s), nam);
300 }
301 
302 int
303 do_sys_connect(struct lwp *l, int fd, struct mbuf *nam)
304 {
305 	struct socket	*so;
306 	int		error;
307 	int		interrupted = 0;
308 
309 	if ((error = fd_getsock(fd, &so)) != 0) {
310 		m_freem(nam);
311 		return (error);
312 	}
313 	solock(so);
314 	MCLAIM(nam, so->so_mowner);
315 	if ((so->so_state & SS_ISCONNECTING) != 0) {
316 		error = EALREADY;
317 		goto out;
318 	}
319 
320 	error = soconnect(so, nam, l);
321 	if (error)
322 		goto bad;
323 	if (so->so_nbio && (so->so_state & SS_ISCONNECTING) != 0) {
324 		error = EINPROGRESS;
325 		goto out;
326 	}
327 	while ((so->so_state & SS_ISCONNECTING) != 0 && so->so_error == 0) {
328 		error = sowait(so, true, 0);
329 		if (__predict_false((so->so_state & SS_ISABORTING) != 0)) {
330 			error = EPIPE;
331 			interrupted = 1;
332 			break;
333 		}
334 		if (error) {
335 			if (error == EINTR || error == ERESTART)
336 				interrupted = 1;
337 			break;
338 		}
339 	}
340 	if (error == 0) {
341 		error = so->so_error;
342 		so->so_error = 0;
343 	}
344  bad:
345 	if (!interrupted)
346 		so->so_state &= ~SS_ISCONNECTING;
347 	if (error == ERESTART)
348 		error = EINTR;
349  out:
350  	sounlock(so);
351  	fd_putfile(fd);
352 	m_freem(nam);
353 	return (error);
354 }
355 
356 int
357 sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap, register_t *retval)
358 {
359 	/* {
360 		syscallarg(int)		domain;
361 		syscallarg(int)		type;
362 		syscallarg(int)		protocol;
363 		syscallarg(int *)	rsv;
364 	} */
365 	file_t		*fp1, *fp2;
366 	struct socket	*so1, *so2;
367 	int		fd, error, sv[2];
368 	proc_t		*p;
369 
370 	p = curproc;
371 	error = socreate(SCARG(uap, domain), &so1, SCARG(uap, type),
372 	    SCARG(uap, protocol), l, NULL);
373 	if (error)
374 		return (error);
375 	error = socreate(SCARG(uap, domain), &so2, SCARG(uap, type),
376 	    SCARG(uap, protocol), l, so1);
377 	if (error)
378 		goto free1;
379 	if ((error = fd_allocfile(&fp1, &fd)) != 0)
380 		goto free2;
381 	sv[0] = fd;
382 	fp1->f_flag = FREAD|FWRITE;
383 	fp1->f_type = DTYPE_SOCKET;
384 	fp1->f_ops = &socketops;
385 	fp1->f_data = so1;
386 	if ((error = fd_allocfile(&fp2, &fd)) != 0)
387 		goto free3;
388 	fp2->f_flag = FREAD|FWRITE;
389 	fp2->f_type = DTYPE_SOCKET;
390 	fp2->f_ops = &socketops;
391 	fp2->f_data = so2;
392 	sv[1] = fd;
393 	solock(so1);
394 	error = soconnect2(so1, so2);
395 	if (error == 0 && SCARG(uap, type) == SOCK_DGRAM) {
396 		/*
397 		 * Datagram socket connection is asymmetric.
398 		 */
399 		error = soconnect2(so2, so1);
400 	}
401 	sounlock(so1);
402 	if (error == 0)
403 		error = copyout(sv, SCARG(uap, rsv), 2 * sizeof(int));
404 	if (error == 0) {
405 		fd_affix(p, fp2, sv[1]);
406 		fd_affix(p, fp1, sv[0]);
407 		return (0);
408 	}
409 	fd_abort(p, fp2, sv[1]);
410  free3:
411 	fd_abort(p, fp1, sv[0]);
412  free2:
413 	(void)soclose(so2);
414  free1:
415 	(void)soclose(so1);
416 	return (error);
417 }
418 
419 int
420 sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, register_t *retval)
421 {
422 	/* {
423 		syscallarg(int)				s;
424 		syscallarg(const void *)		buf;
425 		syscallarg(size_t)			len;
426 		syscallarg(int)				flags;
427 		syscallarg(const struct sockaddr *)	to;
428 		syscallarg(unsigned int)		tolen;
429 	} */
430 	struct msghdr	msg;
431 	struct iovec	aiov;
432 
433 	msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
434 	msg.msg_namelen = SCARG(uap, tolen);
435 	msg.msg_iov = &aiov;
436 	msg.msg_iovlen = 1;
437 	msg.msg_control = NULL;
438 	msg.msg_flags = 0;
439 	aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
440 	aiov.iov_len = SCARG(uap, len);
441 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
442 }
443 
444 int
445 sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, register_t *retval)
446 {
447 	/* {
448 		syscallarg(int)				s;
449 		syscallarg(const struct msghdr *)	msg;
450 		syscallarg(int)				flags;
451 	} */
452 	struct msghdr	msg;
453 	int		error;
454 
455 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
456 	if (error)
457 		return (error);
458 
459 	msg.msg_flags = MSG_IOVUSRSPACE;
460 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
461 }
462 
463 int
464 do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
465 		register_t *retsize)
466 {
467 	struct uio	auio;
468 	int		i, len, error, iovlen;
469 	struct mbuf	*to, *control;
470 	struct socket	*so;
471 	struct iovec	*tiov;
472 	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov;
473 	struct iovec	*ktriov = NULL;
474 
475 	ktrkuser("msghdr", mp, sizeof *mp);
476 
477 	/* If the caller passed us stuff in mbufs, we must free them */
478 	if (mp->msg_flags & MSG_NAMEMBUF)
479 		to = mp->msg_name;
480 	else
481 		to = NULL;
482 
483 	if (mp->msg_flags & MSG_CONTROLMBUF)
484 		control = mp->msg_control;
485 	else
486 		control = NULL;
487 
488 	if (mp->msg_flags & MSG_IOVUSRSPACE) {
489 		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
490 			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
491 				error = EMSGSIZE;
492 				goto bad;
493 			}
494 			iov = malloc(sizeof(struct iovec) * mp->msg_iovlen,
495 			    M_IOV, M_WAITOK);
496 		}
497 		if (mp->msg_iovlen != 0) {
498 			error = copyin(mp->msg_iov, iov,
499 			    (size_t)(mp->msg_iovlen * sizeof(struct iovec)));
500 			if (error)
501 				goto bad;
502 		}
503 		mp->msg_iov = iov;
504 	}
505 
506 	auio.uio_iov = mp->msg_iov;
507 	auio.uio_iovcnt = mp->msg_iovlen;
508 	auio.uio_rw = UIO_WRITE;
509 	auio.uio_offset = 0;			/* XXX */
510 	auio.uio_resid = 0;
511 	KASSERT(l == curlwp);
512 	auio.uio_vmspace = l->l_proc->p_vmspace;
513 
514 	for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) {
515 #if 0
516 		/* cannot happen; iov_len is unsigned */
517 		if (tiov->iov_len < 0) {
518 			error = EINVAL;
519 			goto bad;
520 		}
521 #endif
522 		/*
523 		 * Writes return ssize_t because -1 is returned on error.
524 		 * Therefore, we must restrict the length to SSIZE_MAX to
525 		 * avoid garbage return values.
526 		 */
527 		auio.uio_resid += tiov->iov_len;
528 		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
529 			error = EINVAL;
530 			goto bad;
531 		}
532 	}
533 
534 	if (mp->msg_name && to == NULL) {
535 		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
536 		    MT_SONAME);
537 		if (error)
538 			goto bad;
539 	}
540 
541 	if (mp->msg_control) {
542 		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
543 			error = EINVAL;
544 			goto bad;
545 		}
546 		if (control == NULL) {
547 			error = sockargs(&control, mp->msg_control,
548 			    mp->msg_controllen, MT_CONTROL);
549 			if (error)
550 				goto bad;
551 		}
552 	}
553 
554 	if (ktrpoint(KTR_GENIO)) {
555 		iovlen = auio.uio_iovcnt * sizeof(struct iovec);
556 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
557 		memcpy(ktriov, auio.uio_iov, iovlen);
558 	}
559 
560 	if ((error = fd_getsock(s, &so)) != 0)
561 		goto bad;
562 
563 	if (mp->msg_name)
564 		MCLAIM(to, so->so_mowner);
565 	if (mp->msg_control)
566 		MCLAIM(control, so->so_mowner);
567 
568 	len = auio.uio_resid;
569 	error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
570 	/* Protocol is responsible for freeing 'control' */
571 	control = NULL;
572 
573 	fd_putfile(s);
574 
575 	if (error) {
576 		if (auio.uio_resid != len && (error == ERESTART ||
577 		    error == EINTR || error == EWOULDBLOCK))
578 			error = 0;
579 		if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
580 			mutex_enter(proc_lock);
581 			psignal(l->l_proc, SIGPIPE);
582 			mutex_exit(proc_lock);
583 		}
584 	}
585 	if (error == 0)
586 		*retsize = len - auio.uio_resid;
587 
588 bad:
589 	if (ktriov != NULL) {
590 		ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
591 		free(ktriov, M_TEMP);
592 	}
593 
594  	if (iov != aiov)
595 		free(iov, M_IOV);
596 	if (to)
597 		m_freem(to);
598 	if (control)
599 		m_freem(control);
600 
601 	return (error);
602 }
603 
604 int
605 sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, register_t *retval)
606 {
607 	/* {
608 		syscallarg(int)			s;
609 		syscallarg(void *)		buf;
610 		syscallarg(size_t)		len;
611 		syscallarg(int)			flags;
612 		syscallarg(struct sockaddr *)	from;
613 		syscallarg(unsigned int *)	fromlenaddr;
614 	} */
615 	struct msghdr	msg;
616 	struct iovec	aiov;
617 	int		error;
618 	struct mbuf	*from;
619 
620 	msg.msg_name = NULL;
621 	msg.msg_iov = &aiov;
622 	msg.msg_iovlen = 1;
623 	aiov.iov_base = SCARG(uap, buf);
624 	aiov.iov_len = SCARG(uap, len);
625 	msg.msg_control = NULL;
626 	msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
627 
628 	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
629 	if (error != 0)
630 		return error;
631 
632 	error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
633 	    MSG_LENUSRSPACE, from);
634 	if (from != NULL)
635 		m_free(from);
636 	return error;
637 }
638 
639 int
640 sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, register_t *retval)
641 {
642 	/* {
643 		syscallarg(int)			s;
644 		syscallarg(struct msghdr *)	msg;
645 		syscallarg(int)			flags;
646 	} */
647 	struct msghdr	msg;
648 	int		error;
649 	struct mbuf	*from, *control;
650 
651 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
652 	if (error)
653 		return (error);
654 
655 	msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
656 
657 	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
658 	    msg.msg_control != NULL ? &control : NULL, retval);
659 	if (error != 0)
660 		return error;
661 
662 	if (msg.msg_control != NULL)
663 		error = copyout_msg_control(l, &msg, control);
664 
665 	if (error == 0)
666 		error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
667 			from);
668 	if (from != NULL)
669 		m_free(from);
670 	if (error == 0) {
671 		ktrkuser("msghdr", &msg, sizeof msg);
672 		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
673 	}
674 
675 	return (error);
676 }
677 
678 /*
679  * Adjust for a truncated SCM_RIGHTS control message.
680  *  This means closing any file descriptors that aren't present
681  *  in the returned buffer.
682  *  m is the mbuf holding the (already externalized) SCM_RIGHTS message.
683  */
684 static void
685 free_rights(struct mbuf *m)
686 {
687 	int nfd;
688 	int i;
689 	int *fdv;
690 
691 	nfd = m->m_len < CMSG_SPACE(sizeof(int)) ? 0
692 	    : (m->m_len - CMSG_SPACE(sizeof(int))) / sizeof(int) + 1;
693 	fdv = (int *) CMSG_DATA(mtod(m,struct cmsghdr *));
694 	for (i = 0; i < nfd; i++) {
695 		if (fd_getfile(fdv[i]) != NULL)
696 			(void)fd_close(fdv[i]);
697 	}
698 }
699 
700 void
701 free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
702 {
703 	struct mbuf *next;
704 	struct cmsghdr *cmsg;
705 	bool do_free_rights = false;
706 
707 	while (control != NULL) {
708 		cmsg = mtod(control, struct cmsghdr *);
709 		if (control == uncopied)
710 			do_free_rights = true;
711 		if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
712 		    && cmsg->cmsg_type == SCM_RIGHTS)
713 			free_rights(control);
714 		next = control->m_next;
715 		m_free(control);
716 		control = next;
717 	}
718 }
719 
720 /* Copy socket control/CMSG data to user buffer, frees the mbuf */
721 int
722 copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
723 {
724 	int i, len, error = 0;
725 	struct cmsghdr *cmsg;
726 	struct mbuf *m;
727 	char *q;
728 
729 	len = mp->msg_controllen;
730 	if (len <= 0 || control == 0) {
731 		mp->msg_controllen = 0;
732 		free_control_mbuf(l, control, control);
733 		return 0;
734 	}
735 
736 	q = (char *)mp->msg_control;
737 
738 	for (m = control; m != NULL; ) {
739 		cmsg = mtod(m, struct cmsghdr *);
740 		i = m->m_len;
741 		if (len < i) {
742 			mp->msg_flags |= MSG_CTRUNC;
743 			if (cmsg->cmsg_level == SOL_SOCKET
744 			    && cmsg->cmsg_type == SCM_RIGHTS)
745 				/* Do not truncate me ... */
746 				break;
747 			i = len;
748 		}
749 		error = copyout(mtod(m, void *), q, i);
750 		ktrkuser("msgcontrol", mtod(m, void *), i);
751 		if (error != 0) {
752 			/* We must free all the SCM_RIGHTS */
753 			m = control;
754 			break;
755 		}
756 		m = m->m_next;
757 		if (m)
758 			i = ALIGN(i);
759 		q += i;
760 		len -= i;
761 		if (len <= 0)
762 			break;
763 	}
764 
765 	free_control_mbuf(l, control, m);
766 
767 	mp->msg_controllen = q - (char *)mp->msg_control;
768 	return error;
769 }
770 
771 int
772 do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from,
773     struct mbuf **control, register_t *retsize)
774 {
775 	struct uio	auio;
776 	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov;
777 	struct iovec	*tiov;
778 	int		i, len, error, iovlen;
779 	struct socket	*so;
780 	struct iovec	*ktriov;
781 
782 	ktrkuser("msghdr", mp, sizeof *mp);
783 
784 	*from = NULL;
785 	if (control != NULL)
786 		*control = NULL;
787 
788 	if ((error = fd_getsock(s, &so)) != 0)
789 		return (error);
790 
791 	if (mp->msg_flags & MSG_IOVUSRSPACE) {
792 		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
793 			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
794 				error = EMSGSIZE;
795 				goto out;
796 			}
797 			iov = malloc(sizeof(struct iovec) * mp->msg_iovlen,
798 			    M_IOV, M_WAITOK);
799 		}
800 		if (mp->msg_iovlen != 0) {
801 			error = copyin(mp->msg_iov, iov,
802 			    (size_t)(mp->msg_iovlen * sizeof(struct iovec)));
803 			if (error)
804 				goto out;
805 		}
806 		auio.uio_iov = iov;
807 	} else
808 		auio.uio_iov = mp->msg_iov;
809 	auio.uio_iovcnt = mp->msg_iovlen;
810 	auio.uio_rw = UIO_READ;
811 	auio.uio_offset = 0;			/* XXX */
812 	auio.uio_resid = 0;
813 	KASSERT(l == curlwp);
814 	auio.uio_vmspace = l->l_proc->p_vmspace;
815 
816 	tiov = auio.uio_iov;
817 	for (i = 0; i < mp->msg_iovlen; i++, tiov++) {
818 #if 0
819 		/* cannot happen iov_len is unsigned */
820 		if (tiov->iov_len < 0) {
821 			error = EINVAL;
822 			goto out;
823 		}
824 #endif
825 		/*
826 		 * Reads return ssize_t because -1 is returned on error.
827 		 * Therefore we must restrict the length to SSIZE_MAX to
828 		 * avoid garbage return values.
829 		 */
830 		auio.uio_resid += tiov->iov_len;
831 		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
832 			error = EINVAL;
833 			goto out;
834 		}
835 	}
836 
837 	ktriov = NULL;
838 	if (ktrpoint(KTR_GENIO)) {
839 		iovlen = auio.uio_iovcnt * sizeof(struct iovec);
840 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
841 		memcpy(ktriov, auio.uio_iov, iovlen);
842 	}
843 
844 	len = auio.uio_resid;
845 	mp->msg_flags &= MSG_USERFLAGS;
846 	error = (*so->so_receive)(so, from, &auio, NULL, control,
847 	    &mp->msg_flags);
848 	len -= auio.uio_resid;
849 	*retsize = len;
850 	if (error != 0 && len != 0
851 	    && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
852 		/* Some data transferred */
853 		error = 0;
854 
855 	if (ktriov != NULL) {
856 		ktrgeniov(s, UIO_READ, ktriov, len, error);
857 		free(ktriov, M_TEMP);
858 	}
859 
860 	if (error != 0) {
861 		m_freem(*from);
862 		*from = NULL;
863 		if (control != NULL) {
864 			free_control_mbuf(l, *control, *control);
865 			*control = NULL;
866 		}
867 	}
868  out:
869 	if (iov != aiov)
870 		free(iov, M_TEMP);
871 	fd_putfile(s);
872 	return (error);
873 }
874 
875 
876 /* ARGSUSED */
877 int
878 sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, register_t *retval)
879 {
880 	/* {
881 		syscallarg(int)	s;
882 		syscallarg(int)	how;
883 	} */
884 	struct socket	*so;
885 	int		error;
886 
887 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
888 		return (error);
889 	solock(so);
890 	error = soshutdown(so, SCARG(uap, how));
891 	sounlock(so);
892 	fd_putfile(SCARG(uap, s));
893 	return (error);
894 }
895 
896 /* ARGSUSED */
897 int
898 sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, register_t *retval)
899 {
900 	/* {
901 		syscallarg(int)			s;
902 		syscallarg(int)			level;
903 		syscallarg(int)			name;
904 		syscallarg(const void *)	val;
905 		syscallarg(unsigned int)	valsize;
906 	} */
907 	struct sockopt	sopt;
908 	struct socket	*so;
909 	int		error;
910 	unsigned int	len;
911 
912 	len = SCARG(uap, valsize);
913 	if (len > 0 && SCARG(uap, val) == NULL)
914 		return (EINVAL);
915 
916 	if (len > MCLBYTES)
917 		return (EINVAL);
918 
919 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
920 		return (error);
921 
922 	sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), len);
923 
924 	if (len > 0) {
925 		error = copyin(SCARG(uap, val), sopt.sopt_data, len);
926 		if (error)
927 			goto out;
928 	}
929 
930 	error = sosetopt(so, &sopt);
931 
932  out:
933 	sockopt_destroy(&sopt);
934  	fd_putfile(SCARG(uap, s));
935 	return (error);
936 }
937 
938 /* ARGSUSED */
939 int
940 sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, register_t *retval)
941 {
942 	/* {
943 		syscallarg(int)			s;
944 		syscallarg(int)			level;
945 		syscallarg(int)			name;
946 		syscallarg(void *)		val;
947 		syscallarg(unsigned int *)	avalsize;
948 	} */
949 	struct sockopt	sopt;
950 	struct socket	*so;
951 	unsigned int	valsize, len;
952 	int		error;
953 
954 	if (SCARG(uap, val) != NULL) {
955 		error = copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize));
956 		if (error)
957 			return (error);
958 	} else
959 		valsize = 0;
960 
961 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
962 		return (error);
963 
964 	sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), 0);
965 
966 	error = sogetopt(so, &sopt);
967 	if (error)
968 		goto out;
969 
970 	if (valsize > 0) {
971 		len = min(valsize, sopt.sopt_size);
972 		error = copyout(sopt.sopt_data, SCARG(uap, val), len);
973 		if (error)
974 			goto out;
975 
976 		error = copyout(&len, SCARG(uap, avalsize), sizeof(len));
977 		if (error)
978 			goto out;
979 	}
980 
981  out:
982 	sockopt_destroy(&sopt);
983  	fd_putfile(SCARG(uap, s));
984 	return (error);
985 }
986 
987 #ifdef PIPE_SOCKETPAIR
988 /* ARGSUSED */
989 int
990 sys_pipe(struct lwp *l, const void *v, register_t *retval)
991 {
992 	file_t		*rf, *wf;
993 	struct socket	*rso, *wso;
994 	int		fd, error;
995 	proc_t		*p;
996 
997 	p = curproc;
998 	if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0)
999 		return (error);
1000 	if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0)
1001 		goto free1;
1002 	/* remember this socket pair implements a pipe */
1003 	wso->so_state |= SS_ISAPIPE;
1004 	rso->so_state |= SS_ISAPIPE;
1005 	if ((error = fd_allocfile(&rf, &fd)) != 0)
1006 		goto free2;
1007 	retval[0] = fd;
1008 	rf->f_flag = FREAD;
1009 	rf->f_type = DTYPE_SOCKET;
1010 	rf->f_ops = &socketops;
1011 	rf->f_data = rso;
1012 	if ((error = fd_allocfile(&wf, &fd)) != 0)
1013 		goto free3;
1014 	wf->f_flag = FWRITE;
1015 	wf->f_type = DTYPE_SOCKET;
1016 	wf->f_ops = &socketops;
1017 	wf->f_data = wso;
1018 	retval[1] = fd;
1019 	solock(wso);
1020 	error = unp_connect2(wso, rso, PRU_CONNECT2);
1021 	sounlock(wso);
1022 	if (error != 0)
1023 		goto free4;
1024 	fd_affix(p, wf, (int)retval[1]);
1025 	fd_affix(p, rf, (int)retval[0]);
1026 	return (0);
1027  free4:
1028 	fd_abort(p, wf, (int)retval[1]);
1029  free3:
1030 	fd_abort(p, rf, (int)retval[0]);
1031  free2:
1032 	(void)soclose(wso);
1033  free1:
1034 	(void)soclose(rso);
1035 	return (error);
1036 }
1037 #endif /* PIPE_SOCKETPAIR */
1038 
1039 /*
1040  * Get socket name.
1041  */
1042 /* ARGSUSED */
1043 int
1044 do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam)
1045 {
1046 	struct socket	*so;
1047 	struct mbuf	*m;
1048 	int		error;
1049 
1050 	if ((error = fd_getsock(fd, &so)) != 0)
1051 		return error;
1052 
1053 	m = m_getclr(M_WAIT, MT_SONAME);
1054 	MCLAIM(m, so->so_mowner);
1055 
1056 	solock(so);
1057 	if (which == PRU_PEERADDR
1058 	    && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
1059 		error = ENOTCONN;
1060 	} else {
1061 		*nam = m;
1062 		error = (*so->so_proto->pr_usrreq)(so, which, NULL, m, NULL,
1063 		    NULL);
1064 	}
1065  	sounlock(so);
1066 	if (error != 0)
1067 		m_free(m);
1068  	fd_putfile(fd);
1069 	return error;
1070 }
1071 
1072 int
1073 copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
1074     struct mbuf *addr)
1075 {
1076 	int len;
1077 	int error;
1078 
1079 	if (asa == NULL)
1080 		/* Assume application not interested */
1081 		return 0;
1082 
1083 	if (flags & MSG_LENUSRSPACE) {
1084 		error = copyin(alen, &len, sizeof(len));
1085 		if (error)
1086 			return error;
1087 	} else
1088 		len = *alen;
1089 	if (len < 0)
1090 		return EINVAL;
1091 
1092 	if (addr == NULL) {
1093 		len = 0;
1094 		error = 0;
1095 	} else {
1096 		if (len > addr->m_len)
1097 			len = addr->m_len;
1098 		/* Maybe this ought to copy a chain ? */
1099 		ktrkuser("sockname", mtod(addr, void *), len);
1100 		error = copyout(mtod(addr, void *), asa, len);
1101 	}
1102 
1103 	if (error == 0) {
1104 		if (flags & MSG_LENUSRSPACE)
1105 			error = copyout(&len, alen, sizeof(len));
1106 		else
1107 			*alen = len;
1108 	}
1109 
1110 	return error;
1111 }
1112 
1113 /*
1114  * Get socket name.
1115  */
1116 /* ARGSUSED */
1117 int
1118 sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap, register_t *retval)
1119 {
1120 	/* {
1121 		syscallarg(int)			fdes;
1122 		syscallarg(struct sockaddr *)	asa;
1123 		syscallarg(unsigned int *)	alen;
1124 	} */
1125 	struct mbuf	*m;
1126 	int		error;
1127 
1128 	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_SOCKADDR, &m);
1129 	if (error != 0)
1130 		return error;
1131 
1132 	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1133 	    MSG_LENUSRSPACE, m);
1134 	if (m != NULL)
1135 		m_free(m);
1136 	return error;
1137 }
1138 
1139 /*
1140  * Get name of peer for connected socket.
1141  */
1142 /* ARGSUSED */
1143 int
1144 sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap, register_t *retval)
1145 {
1146 	/* {
1147 		syscallarg(int)			fdes;
1148 		syscallarg(struct sockaddr *)	asa;
1149 		syscallarg(unsigned int *)	alen;
1150 	} */
1151 	struct mbuf	*m;
1152 	int		error;
1153 
1154 	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_PEERADDR, &m);
1155 	if (error != 0)
1156 		return error;
1157 
1158 	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1159 	    MSG_LENUSRSPACE, m);
1160 	if (m != NULL)
1161 		m_free(m);
1162 	return error;
1163 }
1164 
1165 /*
1166  * XXX In a perfect world, we wouldn't pass around socket control
1167  * XXX arguments in mbufs, and this could go away.
1168  */
1169 int
1170 sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
1171 {
1172 	struct sockaddr	*sa;
1173 	struct mbuf	*m;
1174 	int		error;
1175 
1176 	/*
1177 	 * We can't allow socket names > UCHAR_MAX in length, since that
1178 	 * will overflow sa_len.  Control data more than a page size in
1179 	 * length is just too much.
1180 	 */
1181 	if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1182 		return (EINVAL);
1183 
1184 	/* Allocate an mbuf to hold the arguments. */
1185 	m = m_get(M_WAIT, type);
1186 	/* can't claim.  don't who to assign it to. */
1187 	if (buflen > MLEN) {
1188 		/*
1189 		 * Won't fit into a regular mbuf, so we allocate just
1190 		 * enough external storage to hold the argument.
1191 		 */
1192 		MEXTMALLOC(m, buflen, M_WAITOK);
1193 	}
1194 	m->m_len = buflen;
1195 	error = copyin(bf, mtod(m, void *), buflen);
1196 	if (error) {
1197 		(void) m_free(m);
1198 		return (error);
1199 	}
1200 	ktrkuser("sockargs", mtod(m, void *), buflen);
1201 	*mp = m;
1202 	if (type == MT_SONAME) {
1203 		sa = mtod(m, struct sockaddr *);
1204 #if BYTE_ORDER != BIG_ENDIAN
1205 		/*
1206 		 * 4.3BSD compat thing - need to stay, since bind(2),
1207 		 * connect(2), sendto(2) were not versioned for COMPAT_43.
1208 		 */
1209 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1210 			sa->sa_family = sa->sa_len;
1211 #endif
1212 		sa->sa_len = buflen;
1213 	}
1214 	return (0);
1215 }
1216