xref: /netbsd-src/sys/kern/uipc_syscalls.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /*	$NetBSD: uipc_syscalls.c,v 1.148 2011/11/04 02:13:08 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1982, 1986, 1989, 1990, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)uipc_syscalls.c	8.6 (Berkeley) 2/14/95
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.148 2011/11/04 02:13:08 christos Exp $");
65 
66 #include "opt_pipe.h"
67 
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/filedesc.h>
71 #include <sys/proc.h>
72 #include <sys/file.h>
73 #include <sys/buf.h>
74 #define MBUFTYPES
75 #include <sys/mbuf.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/signalvar.h>
80 #include <sys/un.h>
81 #include <sys/ktrace.h>
82 #include <sys/event.h>
83 #include <sys/kauth.h>
84 
85 #include <sys/mount.h>
86 #include <sys/syscallargs.h>
87 
88 /*
89  * System call interface to the socket abstraction.
90  */
91 extern const struct fileops socketops;
92 
93 int
94 sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, register_t *retval)
95 {
96 	/* {
97 		syscallarg(int)	domain;
98 		syscallarg(int)	type;
99 		syscallarg(int)	protocol;
100 	} */
101 	int		fd, error;
102 
103 	error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
104 			 SCARG(uap, protocol), l, &fd);
105 	if (error == 0)
106 		*retval = fd;
107 	return error;
108 }
109 
110 /* ARGSUSED */
111 int
112 sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval)
113 {
114 	/* {
115 		syscallarg(int)				s;
116 		syscallarg(const struct sockaddr *)	name;
117 		syscallarg(unsigned int)		namelen;
118 	} */
119 	struct mbuf	*nam;
120 	int		error;
121 
122 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
123 	    MT_SONAME);
124 	if (error)
125 		return error;
126 
127 	return do_sys_bind(l, SCARG(uap, s), nam);
128 }
129 
130 int
131 do_sys_bind(struct lwp *l, int fd, struct mbuf *nam)
132 {
133 	struct socket	*so;
134 	int		error;
135 
136 	if ((error = fd_getsock(fd, &so)) != 0) {
137 		m_freem(nam);
138 		return (error);
139 	}
140 	MCLAIM(nam, so->so_mowner);
141 	error = sobind(so, nam, l);
142 	m_freem(nam);
143 	fd_putfile(fd);
144 	return error;
145 }
146 
147 /* ARGSUSED */
148 int
149 sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval)
150 {
151 	/* {
152 		syscallarg(int)	s;
153 		syscallarg(int)	backlog;
154 	} */
155 	struct socket	*so;
156 	int		error;
157 
158 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
159 		return (error);
160 	error = solisten(so, SCARG(uap, backlog), l);
161 	fd_putfile(SCARG(uap, s));
162 	return error;
163 }
164 
165 int
166 do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock,
167     const sigset_t *mask, int flags, int clrflags)
168 {
169 	file_t		*fp, *fp2;
170 	struct mbuf	*nam;
171 	int		error, fd;
172 	struct socket	*so, *so2;
173 	short		wakeup_state = 0;
174 
175 	if ((fp = fd_getfile(sock)) == NULL)
176 		return (EBADF);
177 	if (fp->f_type != DTYPE_SOCKET) {
178 		fd_putfile(sock);
179 		return (ENOTSOCK);
180 	}
181 	if ((error = fd_allocfile(&fp2, &fd)) != 0) {
182 		fd_putfile(sock);
183 		return (error);
184 	}
185 	nam = m_get(M_WAIT, MT_SONAME);
186 	*new_sock = fd;
187 	so = fp->f_data;
188 	solock(so);
189 
190 	if (__predict_false(mask))
191 		sigsuspendsetup(l, mask);
192 
193 	if (!(so->so_proto->pr_flags & PR_LISTEN)) {
194 		error = EOPNOTSUPP;
195 		goto bad;
196 	}
197 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
198 		error = EINVAL;
199 		goto bad;
200 	}
201 	if (so->so_nbio && so->so_qlen == 0) {
202 		error = EWOULDBLOCK;
203 		goto bad;
204 	}
205 	while (so->so_qlen == 0 && so->so_error == 0) {
206 		if (so->so_state & SS_CANTRCVMORE) {
207 			so->so_error = ECONNABORTED;
208 			break;
209 		}
210 		if (wakeup_state & SS_RESTARTSYS) {
211 			error = ERESTART;
212 			goto bad;
213 		}
214 		error = sowait(so, true, 0);
215 		if (error) {
216 			goto bad;
217 		}
218 		wakeup_state = so->so_state;
219 	}
220 	if (so->so_error) {
221 		error = so->so_error;
222 		so->so_error = 0;
223 		goto bad;
224 	}
225 	/* connection has been removed from the listen queue */
226 	KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
227 	so2 = TAILQ_FIRST(&so->so_q);
228 	if (soqremque(so2, 1) == 0)
229 		panic("accept");
230 	fp2->f_type = DTYPE_SOCKET;
231 	fp2->f_flag = (fp->f_flag & ~clrflags) |
232 	    ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0);
233 	fp2->f_ops = &socketops;
234 	fp2->f_data = so2;
235 	error = soaccept(so2, nam);
236 	so2->so_cred = kauth_cred_dup(so->so_cred);
237 	sounlock(so);
238 	if (error) {
239 		/* an error occurred, free the file descriptor and mbuf */
240 		m_freem(nam);
241 		mutex_enter(&fp2->f_lock);
242 		fp2->f_count++;
243 		mutex_exit(&fp2->f_lock);
244 		closef(fp2);
245 		fd_abort(curproc, NULL, fd);
246 	} else {
247 		fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0);
248 		fd_affix(curproc, fp2, fd);
249 		*name = nam;
250 	}
251 	fd_putfile(sock);
252 	if (__predict_false(mask))
253 		sigsuspendteardown(l);
254 	return (error);
255  bad:
256  	sounlock(so);
257  	m_freem(nam);
258 	fd_putfile(sock);
259  	fd_abort(curproc, fp2, fd);
260 	if (__predict_false(mask))
261 		sigsuspendteardown(l);
262  	return (error);
263 }
264 
265 int
266 sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval)
267 {
268 	/* {
269 		syscallarg(int)			s;
270 		syscallarg(struct sockaddr *)	name;
271 		syscallarg(unsigned int *)	anamelen;
272 	} */
273 	int error, fd;
274 	struct mbuf *name;
275 
276 	error = do_sys_accept(l, SCARG(uap, s), &name, retval, NULL, 0, 0);
277 	if (error != 0)
278 		return error;
279 	error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
280 	    MSG_LENUSRSPACE, name);
281 	if (name != NULL)
282 		m_free(name);
283 	if (error != 0) {
284 		fd = (int)*retval;
285 		if (fd_getfile(fd) != NULL)
286 			(void)fd_close(fd);
287 	}
288 	return error;
289 }
290 
291 int
292 sys_paccept(struct lwp *l, const struct sys_paccept_args *uap,
293     register_t *retval)
294 {
295 	/* {
296 		syscallarg(int)			s;
297 		syscallarg(struct sockaddr *)	name;
298 		syscallarg(unsigned int *)	anamelen;
299 		syscallarg(const sigset_t *)	mask;
300 		syscallarg(int)			flags;
301 	} */
302 	int error, fd;
303 	struct mbuf *name;
304 	sigset_t *mask, amask;
305 
306 	if (SCARG(uap, mask) != NULL) {
307 		error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
308 		if (error)
309 			return error;
310 		mask = &amask;
311 	} else
312 		mask = NULL;
313 
314 	error = do_sys_accept(l, SCARG(uap, s), &name, retval, mask,
315 	    SCARG(uap, flags), FNONBLOCK);
316 	if (error != 0)
317 		return error;
318 	error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
319 	    MSG_LENUSRSPACE, name);
320 	if (name != NULL)
321 		m_free(name);
322 	if (error != 0) {
323 		fd = (int)*retval;
324 		if (fd_getfile(fd) != NULL)
325 			(void)fd_close(fd);
326 	}
327 	return error;
328 }
329 
330 /* ARGSUSED */
331 int
332 sys_connect(struct lwp *l, const struct sys_connect_args *uap, register_t *retval)
333 {
334 	/* {
335 		syscallarg(int)				s;
336 		syscallarg(const struct sockaddr *)	name;
337 		syscallarg(unsigned int)		namelen;
338 	} */
339 	int		error;
340 	struct mbuf	*nam;
341 
342 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
343 	    MT_SONAME);
344 	if (error)
345 		return error;
346 	return do_sys_connect(l,  SCARG(uap, s), nam);
347 }
348 
349 int
350 do_sys_connect(struct lwp *l, int fd, struct mbuf *nam)
351 {
352 	struct socket	*so;
353 	int		error;
354 	int		interrupted = 0;
355 
356 	if ((error = fd_getsock(fd, &so)) != 0) {
357 		m_freem(nam);
358 		return (error);
359 	}
360 	solock(so);
361 	MCLAIM(nam, so->so_mowner);
362 	if ((so->so_state & SS_ISCONNECTING) != 0) {
363 		error = EALREADY;
364 		goto out;
365 	}
366 
367 	error = soconnect(so, nam, l);
368 	if (error)
369 		goto bad;
370 	if (so->so_nbio && (so->so_state & SS_ISCONNECTING) != 0) {
371 		error = EINPROGRESS;
372 		goto out;
373 	}
374 	while ((so->so_state & SS_ISCONNECTING) != 0 && so->so_error == 0) {
375 		error = sowait(so, true, 0);
376 		if (__predict_false((so->so_state & SS_ISABORTING) != 0)) {
377 			error = EPIPE;
378 			interrupted = 1;
379 			break;
380 		}
381 		if (error) {
382 			if (error == EINTR || error == ERESTART)
383 				interrupted = 1;
384 			break;
385 		}
386 	}
387 	if (error == 0) {
388 		error = so->so_error;
389 		so->so_error = 0;
390 	}
391  bad:
392 	if (!interrupted)
393 		so->so_state &= ~SS_ISCONNECTING;
394 	if (error == ERESTART)
395 		error = EINTR;
396  out:
397  	sounlock(so);
398  	fd_putfile(fd);
399 	m_freem(nam);
400 	return (error);
401 }
402 
403 static int
404 makesocket(struct lwp *l, file_t **fp, int *fd, int flags, int type,
405     int domain, int proto, struct socket *soo)
406 {
407 	int error;
408 	struct socket *so;
409 	int fnonblock = (flags & SOCK_NONBLOCK) ? FNONBLOCK : 0;
410 
411 	if ((error = socreate(domain, &so, type, proto, l, soo)) != 0)
412 		return error;
413 
414 	if ((error = fd_allocfile(fp, fd)) != 0) {
415 		soclose(so);
416 		return error;
417 	}
418 	fd_set_exclose(l, *fd, (flags & SOCK_CLOEXEC) != 0);
419 	(*fp)->f_flag = FREAD|FWRITE|fnonblock;
420 	(*fp)->f_type = DTYPE_SOCKET;
421 	(*fp)->f_ops = &socketops;
422 	(*fp)->f_data = so;
423 	return 0;
424 }
425 
426 int
427 sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap,
428     register_t *retval)
429 {
430 	/* {
431 		syscallarg(int)		domain;
432 		syscallarg(int)		type;
433 		syscallarg(int)		protocol;
434 		syscallarg(int *)	rsv;
435 	} */
436 	file_t		*fp1, *fp2;
437 	struct socket	*so1, *so2;
438 	int		fd, error, sv[2];
439 	proc_t		*p;
440 	int		flags = SCARG(uap, type) & SOCK_FLAGS_MASK;
441 	int		type = SCARG(uap, type) & ~SOCK_FLAGS_MASK;
442 	int		domain = SCARG(uap, domain);
443 	int		proto = SCARG(uap, protocol);
444 
445 	p = curproc;
446 
447 	error = makesocket(l, &fp1, &fd, flags, type, domain, proto, NULL);
448 	if (error)
449 		return error;
450 	so1 = fp1->f_data;
451 	sv[0] = fd;
452 
453 	error = makesocket(l, &fp2, &fd, flags, type, domain, proto, so1);
454 	if (error)
455 		goto out;
456 	so2 = fp2->f_data;
457 	sv[1] = fd;
458 
459 	solock(so1);
460 	error = soconnect2(so1, so2);
461 	if (error == 0 && type == SOCK_DGRAM) {
462 		/*
463 		 * Datagram socket connection is asymmetric.
464 		 */
465 		error = soconnect2(so2, so1);
466 	}
467 	sounlock(so1);
468 
469 	if (error == 0)
470 		error = copyout(sv, SCARG(uap, rsv), sizeof(sv));
471 	if (error == 0) {
472 		fd_affix(p, fp2, sv[1]);
473 		fd_affix(p, fp1, sv[0]);
474 		return 0;
475 	}
476 	fd_abort(p, fp2, sv[1]);
477 	(void)soclose(so2);
478 out:
479 	fd_abort(p, fp1, sv[0]);
480 	(void)soclose(so1);
481 	return error;
482 }
483 
484 int
485 sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, register_t *retval)
486 {
487 	/* {
488 		syscallarg(int)				s;
489 		syscallarg(const void *)		buf;
490 		syscallarg(size_t)			len;
491 		syscallarg(int)				flags;
492 		syscallarg(const struct sockaddr *)	to;
493 		syscallarg(unsigned int)		tolen;
494 	} */
495 	struct msghdr	msg;
496 	struct iovec	aiov;
497 
498 	msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
499 	msg.msg_namelen = SCARG(uap, tolen);
500 	msg.msg_iov = &aiov;
501 	msg.msg_iovlen = 1;
502 	msg.msg_control = NULL;
503 	msg.msg_flags = 0;
504 	aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
505 	aiov.iov_len = SCARG(uap, len);
506 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
507 }
508 
509 int
510 sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, register_t *retval)
511 {
512 	/* {
513 		syscallarg(int)				s;
514 		syscallarg(const struct msghdr *)	msg;
515 		syscallarg(int)				flags;
516 	} */
517 	struct msghdr	msg;
518 	int		error;
519 
520 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
521 	if (error)
522 		return (error);
523 
524 	msg.msg_flags = MSG_IOVUSRSPACE;
525 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
526 }
527 
528 int
529 do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
530 		register_t *retsize)
531 {
532 	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
533 	struct mbuf	*to, *control;
534 	struct socket	*so;
535 	struct uio	auio;
536 	size_t		len, iovsz;
537 	int		i, error;
538 
539 	ktrkuser("msghdr", mp, sizeof *mp);
540 
541 	/* If the caller passed us stuff in mbufs, we must free them. */
542 	to = (mp->msg_flags & MSG_NAMEMBUF) ? mp->msg_name : NULL;
543 	control = (mp->msg_flags & MSG_CONTROLMBUF) ? mp->msg_control : NULL;
544 	iovsz = mp->msg_iovlen * sizeof(struct iovec);
545 
546 	if (mp->msg_flags & MSG_IOVUSRSPACE) {
547 		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
548 			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
549 				error = EMSGSIZE;
550 				goto bad;
551 			}
552 			iov = kmem_alloc(iovsz, KM_SLEEP);
553 		}
554 		if (mp->msg_iovlen != 0) {
555 			error = copyin(mp->msg_iov, iov, iovsz);
556 			if (error)
557 				goto bad;
558 		}
559 		mp->msg_iov = iov;
560 	}
561 
562 	auio.uio_iov = mp->msg_iov;
563 	auio.uio_iovcnt = mp->msg_iovlen;
564 	auio.uio_rw = UIO_WRITE;
565 	auio.uio_offset = 0;			/* XXX */
566 	auio.uio_resid = 0;
567 	KASSERT(l == curlwp);
568 	auio.uio_vmspace = l->l_proc->p_vmspace;
569 
570 	for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) {
571 		/*
572 		 * Writes return ssize_t because -1 is returned on error.
573 		 * Therefore, we must restrict the length to SSIZE_MAX to
574 		 * avoid garbage return values.
575 		 */
576 		auio.uio_resid += tiov->iov_len;
577 		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
578 			error = EINVAL;
579 			goto bad;
580 		}
581 	}
582 
583 	if (mp->msg_name && to == NULL) {
584 		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
585 		    MT_SONAME);
586 		if (error)
587 			goto bad;
588 	}
589 
590 	if (mp->msg_control) {
591 		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
592 			error = EINVAL;
593 			goto bad;
594 		}
595 		if (control == NULL) {
596 			error = sockargs(&control, mp->msg_control,
597 			    mp->msg_controllen, MT_CONTROL);
598 			if (error)
599 				goto bad;
600 		}
601 	}
602 
603 	if (ktrpoint(KTR_GENIO)) {
604 		ktriov = kmem_alloc(iovsz, KM_SLEEP);
605 		memcpy(ktriov, auio.uio_iov, iovsz);
606 	}
607 
608 	if ((error = fd_getsock(s, &so)) != 0)
609 		goto bad;
610 
611 	if (mp->msg_name)
612 		MCLAIM(to, so->so_mowner);
613 	if (mp->msg_control)
614 		MCLAIM(control, so->so_mowner);
615 
616 	len = auio.uio_resid;
617 	error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
618 	/* Protocol is responsible for freeing 'control' */
619 	control = NULL;
620 
621 	fd_putfile(s);
622 
623 	if (error) {
624 		if (auio.uio_resid != len && (error == ERESTART ||
625 		    error == EINTR || error == EWOULDBLOCK))
626 			error = 0;
627 		if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
628 			mutex_enter(proc_lock);
629 			psignal(l->l_proc, SIGPIPE);
630 			mutex_exit(proc_lock);
631 		}
632 	}
633 	if (error == 0)
634 		*retsize = len - auio.uio_resid;
635 
636 bad:
637 	if (ktriov != NULL) {
638 		ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
639 		kmem_free(ktriov, iovsz);
640 	}
641 
642  	if (iov != aiov)
643  		kmem_free(iov, iovsz);
644 	if (to)
645 		m_freem(to);
646 	if (control)
647 		m_freem(control);
648 
649 	return (error);
650 }
651 
652 int
653 sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, register_t *retval)
654 {
655 	/* {
656 		syscallarg(int)			s;
657 		syscallarg(void *)		buf;
658 		syscallarg(size_t)		len;
659 		syscallarg(int)			flags;
660 		syscallarg(struct sockaddr *)	from;
661 		syscallarg(unsigned int *)	fromlenaddr;
662 	} */
663 	struct msghdr	msg;
664 	struct iovec	aiov;
665 	int		error;
666 	struct mbuf	*from;
667 
668 	msg.msg_name = NULL;
669 	msg.msg_iov = &aiov;
670 	msg.msg_iovlen = 1;
671 	aiov.iov_base = SCARG(uap, buf);
672 	aiov.iov_len = SCARG(uap, len);
673 	msg.msg_control = NULL;
674 	msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
675 
676 	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
677 	if (error != 0)
678 		return error;
679 
680 	error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
681 	    MSG_LENUSRSPACE, from);
682 	if (from != NULL)
683 		m_free(from);
684 	return error;
685 }
686 
687 int
688 sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, register_t *retval)
689 {
690 	/* {
691 		syscallarg(int)			s;
692 		syscallarg(struct msghdr *)	msg;
693 		syscallarg(int)			flags;
694 	} */
695 	struct msghdr	msg;
696 	int		error;
697 	struct mbuf	*from, *control;
698 
699 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
700 	if (error)
701 		return (error);
702 
703 	msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
704 
705 	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
706 	    msg.msg_control != NULL ? &control : NULL, retval);
707 	if (error != 0)
708 		return error;
709 
710 	if (msg.msg_control != NULL)
711 		error = copyout_msg_control(l, &msg, control);
712 
713 	if (error == 0)
714 		error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
715 			from);
716 	if (from != NULL)
717 		m_free(from);
718 	if (error == 0) {
719 		ktrkuser("msghdr", &msg, sizeof msg);
720 		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
721 	}
722 
723 	return (error);
724 }
725 
726 /*
727  * Adjust for a truncated SCM_RIGHTS control message.
728  *  This means closing any file descriptors that aren't present
729  *  in the returned buffer.
730  *  m is the mbuf holding the (already externalized) SCM_RIGHTS message.
731  */
732 static void
733 free_rights(struct mbuf *m)
734 {
735 	int nfd;
736 	int i;
737 	int *fdv;
738 
739 	nfd = m->m_len < CMSG_SPACE(sizeof(int)) ? 0
740 	    : (m->m_len - CMSG_SPACE(sizeof(int))) / sizeof(int) + 1;
741 	fdv = (int *) CMSG_DATA(mtod(m,struct cmsghdr *));
742 	for (i = 0; i < nfd; i++) {
743 		if (fd_getfile(fdv[i]) != NULL)
744 			(void)fd_close(fdv[i]);
745 	}
746 }
747 
748 void
749 free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
750 {
751 	struct mbuf *next;
752 	struct cmsghdr *cmsg;
753 	bool do_free_rights = false;
754 
755 	while (control != NULL) {
756 		cmsg = mtod(control, struct cmsghdr *);
757 		if (control == uncopied)
758 			do_free_rights = true;
759 		if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
760 		    && cmsg->cmsg_type == SCM_RIGHTS)
761 			free_rights(control);
762 		next = control->m_next;
763 		m_free(control);
764 		control = next;
765 	}
766 }
767 
768 /* Copy socket control/CMSG data to user buffer, frees the mbuf */
769 int
770 copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
771 {
772 	int i, len, error = 0;
773 	struct cmsghdr *cmsg;
774 	struct mbuf *m;
775 	char *q;
776 
777 	len = mp->msg_controllen;
778 	if (len <= 0 || control == 0) {
779 		mp->msg_controllen = 0;
780 		free_control_mbuf(l, control, control);
781 		return 0;
782 	}
783 
784 	q = (char *)mp->msg_control;
785 
786 	for (m = control; m != NULL; ) {
787 		cmsg = mtod(m, struct cmsghdr *);
788 		i = m->m_len;
789 		if (len < i) {
790 			mp->msg_flags |= MSG_CTRUNC;
791 			if (cmsg->cmsg_level == SOL_SOCKET
792 			    && cmsg->cmsg_type == SCM_RIGHTS)
793 				/* Do not truncate me ... */
794 				break;
795 			i = len;
796 		}
797 		error = copyout(mtod(m, void *), q, i);
798 		ktrkuser("msgcontrol", mtod(m, void *), i);
799 		if (error != 0) {
800 			/* We must free all the SCM_RIGHTS */
801 			m = control;
802 			break;
803 		}
804 		m = m->m_next;
805 		if (m)
806 			i = ALIGN(i);
807 		q += i;
808 		len -= i;
809 		if (len <= 0)
810 			break;
811 	}
812 
813 	free_control_mbuf(l, control, m);
814 
815 	mp->msg_controllen = q - (char *)mp->msg_control;
816 	return error;
817 }
818 
819 int
820 do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from,
821     struct mbuf **control, register_t *retsize)
822 {
823 	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov;
824 	struct socket	*so;
825 	struct uio	auio;
826 	size_t		len, iovsz;
827 	int		i, error;
828 
829 	ktrkuser("msghdr", mp, sizeof *mp);
830 
831 	*from = NULL;
832 	if (control != NULL)
833 		*control = NULL;
834 
835 	if ((error = fd_getsock(s, &so)) != 0)
836 		return (error);
837 
838 	iovsz = mp->msg_iovlen * sizeof(struct iovec);
839 
840 	if (mp->msg_flags & MSG_IOVUSRSPACE) {
841 		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
842 			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
843 				error = EMSGSIZE;
844 				goto out;
845 			}
846 			iov = kmem_alloc(iovsz, KM_SLEEP);
847 		}
848 		if (mp->msg_iovlen != 0) {
849 			error = copyin(mp->msg_iov, iov, iovsz);
850 			if (error)
851 				goto out;
852 		}
853 		auio.uio_iov = iov;
854 	} else
855 		auio.uio_iov = mp->msg_iov;
856 	auio.uio_iovcnt = mp->msg_iovlen;
857 	auio.uio_rw = UIO_READ;
858 	auio.uio_offset = 0;			/* XXX */
859 	auio.uio_resid = 0;
860 	KASSERT(l == curlwp);
861 	auio.uio_vmspace = l->l_proc->p_vmspace;
862 
863 	tiov = auio.uio_iov;
864 	for (i = 0; i < mp->msg_iovlen; i++, tiov++) {
865 		/*
866 		 * Reads return ssize_t because -1 is returned on error.
867 		 * Therefore we must restrict the length to SSIZE_MAX to
868 		 * avoid garbage return values.
869 		 */
870 		auio.uio_resid += tiov->iov_len;
871 		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
872 			error = EINVAL;
873 			goto out;
874 		}
875 	}
876 
877 	ktriov = NULL;
878 	if (ktrpoint(KTR_GENIO)) {
879 		ktriov = kmem_alloc(iovsz, KM_SLEEP);
880 		memcpy(ktriov, auio.uio_iov, iovsz);
881 	}
882 
883 	len = auio.uio_resid;
884 	mp->msg_flags &= MSG_USERFLAGS;
885 	error = (*so->so_receive)(so, from, &auio, NULL, control,
886 	    &mp->msg_flags);
887 	len -= auio.uio_resid;
888 	*retsize = len;
889 	if (error != 0 && len != 0
890 	    && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
891 		/* Some data transferred */
892 		error = 0;
893 
894 	if (ktriov != NULL) {
895 		ktrgeniov(s, UIO_READ, ktriov, len, error);
896 		kmem_free(ktriov, iovsz);
897 	}
898 
899 	if (error != 0) {
900 		m_freem(*from);
901 		*from = NULL;
902 		if (control != NULL) {
903 			free_control_mbuf(l, *control, *control);
904 			*control = NULL;
905 		}
906 	}
907  out:
908 	if (iov != aiov)
909 		kmem_free(iov, iovsz);
910 	fd_putfile(s);
911 	return (error);
912 }
913 
914 
915 /* ARGSUSED */
916 int
917 sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, register_t *retval)
918 {
919 	/* {
920 		syscallarg(int)	s;
921 		syscallarg(int)	how;
922 	} */
923 	struct socket	*so;
924 	int		error;
925 
926 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
927 		return (error);
928 	solock(so);
929 	error = soshutdown(so, SCARG(uap, how));
930 	sounlock(so);
931 	fd_putfile(SCARG(uap, s));
932 	return (error);
933 }
934 
935 /* ARGSUSED */
936 int
937 sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, register_t *retval)
938 {
939 	/* {
940 		syscallarg(int)			s;
941 		syscallarg(int)			level;
942 		syscallarg(int)			name;
943 		syscallarg(const void *)	val;
944 		syscallarg(unsigned int)	valsize;
945 	} */
946 	struct sockopt	sopt;
947 	struct socket	*so;
948 	int		error;
949 	unsigned int	len;
950 
951 	len = SCARG(uap, valsize);
952 	if (len > 0 && SCARG(uap, val) == NULL)
953 		return (EINVAL);
954 
955 	if (len > MCLBYTES)
956 		return (EINVAL);
957 
958 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
959 		return (error);
960 
961 	sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), len);
962 
963 	if (len > 0) {
964 		error = copyin(SCARG(uap, val), sopt.sopt_data, len);
965 		if (error)
966 			goto out;
967 	}
968 
969 	error = sosetopt(so, &sopt);
970 
971  out:
972 	sockopt_destroy(&sopt);
973  	fd_putfile(SCARG(uap, s));
974 	return (error);
975 }
976 
977 /* ARGSUSED */
978 int
979 sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, register_t *retval)
980 {
981 	/* {
982 		syscallarg(int)			s;
983 		syscallarg(int)			level;
984 		syscallarg(int)			name;
985 		syscallarg(void *)		val;
986 		syscallarg(unsigned int *)	avalsize;
987 	} */
988 	struct sockopt	sopt;
989 	struct socket	*so;
990 	unsigned int	valsize, len;
991 	int		error;
992 
993 	if (SCARG(uap, val) != NULL) {
994 		error = copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize));
995 		if (error)
996 			return (error);
997 	} else
998 		valsize = 0;
999 
1000 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
1001 		return (error);
1002 
1003 	sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), 0);
1004 
1005 	error = sogetopt(so, &sopt);
1006 	if (error)
1007 		goto out;
1008 
1009 	if (valsize > 0) {
1010 		len = min(valsize, sopt.sopt_size);
1011 		error = copyout(sopt.sopt_data, SCARG(uap, val), len);
1012 		if (error)
1013 			goto out;
1014 
1015 		error = copyout(&len, SCARG(uap, avalsize), sizeof(len));
1016 		if (error)
1017 			goto out;
1018 	}
1019 
1020  out:
1021 	sockopt_destroy(&sopt);
1022  	fd_putfile(SCARG(uap, s));
1023 	return (error);
1024 }
1025 
1026 #ifdef PIPE_SOCKETPAIR
1027 /* ARGSUSED */
1028 int
1029 pipe1(struct lwp *l, register_t *retval, int flags)
1030 {
1031 	file_t		*rf, *wf;
1032 	struct socket	*rso, *wso;
1033 	int		fd, error;
1034 	proc_t		*p;
1035 
1036 	if (flags & ~(O_CLOEXEC|O_NONBLOCK))
1037 		return EINVAL;
1038 	p = curproc;
1039 	if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0)
1040 		return (error);
1041 	if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0)
1042 		goto free1;
1043 	/* remember this socket pair implements a pipe */
1044 	wso->so_state |= SS_ISAPIPE;
1045 	rso->so_state |= SS_ISAPIPE;
1046 	if ((error = fd_allocfile(&rf, &fd)) != 0)
1047 		goto free2;
1048 	retval[0] = fd;
1049 	rf->f_flag = FREAD | flags;
1050 	rf->f_type = DTYPE_SOCKET;
1051 	rf->f_ops = &socketops;
1052 	rf->f_data = rso;
1053 	if ((error = fd_allocfile(&wf, &fd)) != 0)
1054 		goto free3;
1055 	wf->f_flag = FWRITE | flags;
1056 	wf->f_type = DTYPE_SOCKET;
1057 	wf->f_ops = &socketops;
1058 	wf->f_data = wso;
1059 	retval[1] = fd;
1060 	solock(wso);
1061 	error = unp_connect2(wso, rso, PRU_CONNECT2);
1062 	sounlock(wso);
1063 	if (error != 0)
1064 		goto free4;
1065 	fd_affix(p, wf, (int)retval[1]);
1066 	fd_affix(p, rf, (int)retval[0]);
1067 	return (0);
1068  free4:
1069 	fd_abort(p, wf, (int)retval[1]);
1070  free3:
1071 	fd_abort(p, rf, (int)retval[0]);
1072  free2:
1073 	(void)soclose(wso);
1074  free1:
1075 	(void)soclose(rso);
1076 	return (error);
1077 }
1078 #endif /* PIPE_SOCKETPAIR */
1079 
1080 /*
1081  * Get socket name.
1082  */
1083 /* ARGSUSED */
1084 int
1085 do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam)
1086 {
1087 	struct socket	*so;
1088 	struct mbuf	*m;
1089 	int		error;
1090 
1091 	if ((error = fd_getsock(fd, &so)) != 0)
1092 		return error;
1093 
1094 	m = m_getclr(M_WAIT, MT_SONAME);
1095 	MCLAIM(m, so->so_mowner);
1096 
1097 	solock(so);
1098 	if (which == PRU_PEERADDR
1099 	    && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
1100 		error = ENOTCONN;
1101 	} else {
1102 		*nam = m;
1103 		error = (*so->so_proto->pr_usrreq)(so, which, NULL, m, NULL,
1104 		    NULL);
1105 	}
1106  	sounlock(so);
1107 	if (error != 0)
1108 		m_free(m);
1109  	fd_putfile(fd);
1110 	return error;
1111 }
1112 
1113 int
1114 copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
1115     struct mbuf *addr)
1116 {
1117 	int len;
1118 	int error;
1119 
1120 	if (asa == NULL)
1121 		/* Assume application not interested */
1122 		return 0;
1123 
1124 	if (flags & MSG_LENUSRSPACE) {
1125 		error = copyin(alen, &len, sizeof(len));
1126 		if (error)
1127 			return error;
1128 	} else
1129 		len = *alen;
1130 	if (len < 0)
1131 		return EINVAL;
1132 
1133 	if (addr == NULL) {
1134 		len = 0;
1135 		error = 0;
1136 	} else {
1137 		if (len > addr->m_len)
1138 			len = addr->m_len;
1139 		/* Maybe this ought to copy a chain ? */
1140 		ktrkuser("sockname", mtod(addr, void *), len);
1141 		error = copyout(mtod(addr, void *), asa, len);
1142 	}
1143 
1144 	if (error == 0) {
1145 		if (flags & MSG_LENUSRSPACE)
1146 			error = copyout(&len, alen, sizeof(len));
1147 		else
1148 			*alen = len;
1149 	}
1150 
1151 	return error;
1152 }
1153 
1154 /*
1155  * Get socket name.
1156  */
1157 /* ARGSUSED */
1158 int
1159 sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap, register_t *retval)
1160 {
1161 	/* {
1162 		syscallarg(int)			fdes;
1163 		syscallarg(struct sockaddr *)	asa;
1164 		syscallarg(unsigned int *)	alen;
1165 	} */
1166 	struct mbuf	*m;
1167 	int		error;
1168 
1169 	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_SOCKADDR, &m);
1170 	if (error != 0)
1171 		return error;
1172 
1173 	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1174 	    MSG_LENUSRSPACE, m);
1175 	if (m != NULL)
1176 		m_free(m);
1177 	return error;
1178 }
1179 
1180 /*
1181  * Get name of peer for connected socket.
1182  */
1183 /* ARGSUSED */
1184 int
1185 sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap, register_t *retval)
1186 {
1187 	/* {
1188 		syscallarg(int)			fdes;
1189 		syscallarg(struct sockaddr *)	asa;
1190 		syscallarg(unsigned int *)	alen;
1191 	} */
1192 	struct mbuf	*m;
1193 	int		error;
1194 
1195 	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_PEERADDR, &m);
1196 	if (error != 0)
1197 		return error;
1198 
1199 	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1200 	    MSG_LENUSRSPACE, m);
1201 	if (m != NULL)
1202 		m_free(m);
1203 	return error;
1204 }
1205 
1206 /*
1207  * XXX In a perfect world, we wouldn't pass around socket control
1208  * XXX arguments in mbufs, and this could go away.
1209  */
1210 int
1211 sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
1212 {
1213 	struct sockaddr	*sa;
1214 	struct mbuf	*m;
1215 	int		error;
1216 
1217 	/*
1218 	 * We can't allow socket names > UCHAR_MAX in length, since that
1219 	 * will overflow sa_len.  Control data more than a page size in
1220 	 * length is just too much.
1221 	 */
1222 	if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1223 		return (EINVAL);
1224 
1225 	/* Allocate an mbuf to hold the arguments. */
1226 	m = m_get(M_WAIT, type);
1227 	/* can't claim.  don't who to assign it to. */
1228 	if (buflen > MLEN) {
1229 		/*
1230 		 * Won't fit into a regular mbuf, so we allocate just
1231 		 * enough external storage to hold the argument.
1232 		 */
1233 		MEXTMALLOC(m, buflen, M_WAITOK);
1234 	}
1235 	m->m_len = buflen;
1236 	error = copyin(bf, mtod(m, void *), buflen);
1237 	if (error) {
1238 		(void) m_free(m);
1239 		return (error);
1240 	}
1241 	ktrkuser(mbuftypes[type], mtod(m, void *), buflen);
1242 	*mp = m;
1243 	if (type == MT_SONAME) {
1244 		sa = mtod(m, struct sockaddr *);
1245 #if BYTE_ORDER != BIG_ENDIAN
1246 		/*
1247 		 * 4.3BSD compat thing - need to stay, since bind(2),
1248 		 * connect(2), sendto(2) were not versioned for COMPAT_43.
1249 		 */
1250 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1251 			sa->sa_family = sa->sa_len;
1252 #endif
1253 		sa->sa_len = buflen;
1254 	}
1255 	return (0);
1256 }
1257