xref: /netbsd-src/sys/kern/uipc_syscalls.c (revision f82d7874c259b2a6cc59b714f844919f32bf7b51)
1 /*	$NetBSD: uipc_syscalls.c,v 1.131 2008/04/28 20:24:05 martin Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 1982, 1986, 1989, 1990, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. Neither the name of the University nor the names of its contributors
42  *    may be used to endorse or promote products derived from this software
43  *    without specific prior written permission.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  *
57  *	@(#)uipc_syscalls.c	8.6 (Berkeley) 2/14/95
58  */
59 
60 #include <sys/cdefs.h>
61 __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.131 2008/04/28 20:24:05 martin Exp $");
62 
63 #include "opt_pipe.h"
64 
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/filedesc.h>
68 #include <sys/proc.h>
69 #include <sys/file.h>
70 #include <sys/buf.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/signalvar.h>
77 #include <sys/un.h>
78 #include <sys/ktrace.h>
79 #include <sys/event.h>
80 
81 #include <sys/mount.h>
82 #include <sys/syscallargs.h>
83 
84 #include <uvm/uvm_extern.h>
85 
86 /*
87  * System call interface to the socket abstraction.
88  */
89 extern const struct fileops socketops;
90 
91 int
92 sys___socket30(struct lwp *l, const struct sys___socket30_args *uap, register_t *retval)
93 {
94 	/* {
95 		syscallarg(int)	domain;
96 		syscallarg(int)	type;
97 		syscallarg(int)	protocol;
98 	} */
99 	int		fd, error;
100 
101 	error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
102 			 SCARG(uap, protocol), l, &fd);
103 	if (error == 0)
104 		*retval = fd;
105 	return error;
106 }
107 
108 /* ARGSUSED */
109 int
110 sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval)
111 {
112 	/* {
113 		syscallarg(int)				s;
114 		syscallarg(const struct sockaddr *)	name;
115 		syscallarg(unsigned int)		namelen;
116 	} */
117 	struct mbuf	*nam;
118 	int		error;
119 
120 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
121 	    MT_SONAME);
122 	if (error)
123 		return error;
124 
125 	return do_sys_bind(l, SCARG(uap, s), nam);
126 }
127 
128 int
129 do_sys_bind(struct lwp *l, int fd, struct mbuf *nam)
130 {
131 	struct socket	*so;
132 	int		error;
133 
134 	if ((error = fd_getsock(fd, &so)) != 0) {
135 		m_freem(nam);
136 		return (error);
137 	}
138 	MCLAIM(nam, so->so_mowner);
139 	error = sobind(so, nam, l);
140 	m_freem(nam);
141 	fd_putfile(fd);
142 	return error;
143 }
144 
145 /* ARGSUSED */
146 int
147 sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval)
148 {
149 	/* {
150 		syscallarg(int)	s;
151 		syscallarg(int)	backlog;
152 	} */
153 	struct socket	*so;
154 	int		error;
155 
156 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
157 		return (error);
158 	error = solisten(so, SCARG(uap, backlog), l);
159 	fd_putfile(SCARG(uap, s));
160 	return error;
161 }
162 
163 int
164 do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock)
165 {
166 	file_t		*fp, *fp2;
167 	struct mbuf	*nam;
168 	int		error, fd;
169 	struct socket	*so, *so2;
170 
171 	if ((fp = fd_getfile(sock)) == NULL)
172 		return (EBADF);
173 	if (fp->f_type != DTYPE_SOCKET)
174 		return (ENOTSOCK);
175 	if ((error = fd_allocfile(&fp2, &fd)) != 0)
176 		return (error);
177 	nam = m_get(M_WAIT, MT_SONAME);
178 	*new_sock = fd;
179 	so = fp->f_data;
180 	solock(so);
181 	if (!(so->so_proto->pr_flags & PR_LISTEN)) {
182 		error = EOPNOTSUPP;
183 		goto bad;
184 	}
185 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
186 		error = EINVAL;
187 		goto bad;
188 	}
189 	if (so->so_nbio && so->so_qlen == 0) {
190 		error = EWOULDBLOCK;
191 		goto bad;
192 	}
193 	while (so->so_qlen == 0 && so->so_error == 0) {
194 		if (so->so_state & SS_CANTRCVMORE) {
195 			so->so_error = ECONNABORTED;
196 			break;
197 		}
198 		error = sowait(so, 0);
199 		if (error) {
200 			goto bad;
201 		}
202 	}
203 	if (so->so_error) {
204 		error = so->so_error;
205 		so->so_error = 0;
206 		goto bad;
207 	}
208 	/* connection has been removed from the listen queue */
209 	KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
210 	so2 = TAILQ_FIRST(&so->so_q);
211 	if (soqremque(so2, 1) == 0)
212 		panic("accept");
213 	fp2->f_type = DTYPE_SOCKET;
214 	fp2->f_flag = fp->f_flag;
215 	fp2->f_ops = &socketops;
216 	fp2->f_data = so2;
217 	error = soaccept(so2, nam);
218 	sounlock(so);
219 	if (error) {
220 		/* an error occurred, free the file descriptor and mbuf */
221 		m_freem(nam);
222 		mutex_enter(&fp2->f_lock);
223 		fp2->f_count++;
224 		mutex_exit(&fp2->f_lock);
225 		closef(fp2);
226 		fd_abort(curproc, NULL, fd);
227 	} else {
228 		fd_affix(curproc, fp2, fd);
229 		*name = nam;
230 	}
231 	fd_putfile(sock);
232 	return (error);
233  bad:
234  	sounlock(so);
235  	m_freem(nam);
236 	fd_putfile(sock);
237  	fd_abort(curproc, fp2, fd);
238  	return (error);
239 }
240 
241 int
242 sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval)
243 {
244 	/* {
245 		syscallarg(int)			s;
246 		syscallarg(struct sockaddr *)	name;
247 		syscallarg(unsigned int *)	anamelen;
248 	} */
249 	int error, fd;
250 	struct mbuf *name;
251 
252 	error = do_sys_accept(l, SCARG(uap, s), &name, retval);
253 	if (error != 0)
254 		return error;
255 	error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
256 	    MSG_LENUSRSPACE, name);
257 	if (name != NULL)
258 		m_free(name);
259 	if (error != 0) {
260 		fd = (int)*retval;
261 		if (fd_getfile(fd) != NULL)
262 			(void)fd_close(fd);
263 	}
264 	return error;
265 }
266 
267 /* ARGSUSED */
268 int
269 sys_connect(struct lwp *l, const struct sys_connect_args *uap, register_t *retval)
270 {
271 	/* {
272 		syscallarg(int)				s;
273 		syscallarg(const struct sockaddr *)	name;
274 		syscallarg(unsigned int)		namelen;
275 	} */
276 	int		error;
277 	struct mbuf	*nam;
278 
279 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
280 	    MT_SONAME);
281 	if (error)
282 		return error;
283 	return do_sys_connect(l,  SCARG(uap, s), nam);
284 }
285 
286 int
287 do_sys_connect(struct lwp *l, int fd, struct mbuf *nam)
288 {
289 	struct socket	*so;
290 	int		error;
291 	int		interrupted = 0;
292 
293 	if ((error = fd_getsock(fd, &so)) != 0) {
294 		m_freem(nam);
295 		return (error);
296 	}
297 	solock(so);
298 	MCLAIM(nam, so->so_mowner);
299 	if (so->so_state & SS_ISCONNECTING) {
300 		error = EALREADY;
301 		goto out;
302 	}
303 
304 	error = soconnect(so, nam, l);
305 	if (error)
306 		goto bad;
307 	if (so->so_nbio && (so->so_state & SS_ISCONNECTING)) {
308 		error = EINPROGRESS;
309 		goto out;
310 	}
311 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
312 		error = sowait(so, 0);
313 		if (error) {
314 			if (error == EINTR || error == ERESTART)
315 				interrupted = 1;
316 			break;
317 		}
318 	}
319 	if (error == 0) {
320 		error = so->so_error;
321 		so->so_error = 0;
322 	}
323  bad:
324 	if (!interrupted)
325 		so->so_state &= ~SS_ISCONNECTING;
326 	if (error == ERESTART)
327 		error = EINTR;
328  out:
329  	sounlock(so);
330  	fd_putfile(fd);
331 	m_freem(nam);
332 	return (error);
333 }
334 
335 int
336 sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap, register_t *retval)
337 {
338 	/* {
339 		syscallarg(int)		domain;
340 		syscallarg(int)		type;
341 		syscallarg(int)		protocol;
342 		syscallarg(int *)	rsv;
343 	} */
344 	file_t		*fp1, *fp2;
345 	struct socket	*so1, *so2;
346 	int		fd, error, sv[2];
347 	proc_t		*p;
348 
349 	p = curproc;
350 	error = socreate(SCARG(uap, domain), &so1, SCARG(uap, type),
351 	    SCARG(uap, protocol), l, NULL);
352 	if (error)
353 		return (error);
354 	error = socreate(SCARG(uap, domain), &so2, SCARG(uap, type),
355 	    SCARG(uap, protocol), l, so1);
356 	if (error)
357 		goto free1;
358 	if ((error = fd_allocfile(&fp1, &fd)) != 0)
359 		goto free2;
360 	sv[0] = fd;
361 	fp1->f_flag = FREAD|FWRITE;
362 	fp1->f_type = DTYPE_SOCKET;
363 	fp1->f_ops = &socketops;
364 	fp1->f_data = so1;
365 	if ((error = fd_allocfile(&fp2, &fd)) != 0)
366 		goto free3;
367 	fp2->f_flag = FREAD|FWRITE;
368 	fp2->f_type = DTYPE_SOCKET;
369 	fp2->f_ops = &socketops;
370 	fp2->f_data = so2;
371 	sv[1] = fd;
372 	solock(so1);
373 	error = soconnect2(so1, so2);
374 	if (error == 0 && SCARG(uap, type) == SOCK_DGRAM) {
375 		/*
376 		 * Datagram socket connection is asymmetric.
377 		 */
378 		error = soconnect2(so2, so1);
379 	}
380 	sounlock(so1);
381 	if (error == 0)
382 		error = copyout(sv, SCARG(uap, rsv), 2 * sizeof(int));
383 	if (error == 0) {
384 		fd_affix(p, fp2, sv[1]);
385 		fd_affix(p, fp1, sv[0]);
386 		return (0);
387 	}
388 	fd_abort(p, fp2, sv[1]);
389  free3:
390 	fd_abort(p, fp1, sv[0]);
391  free2:
392 	(void)soclose(so2);
393  free1:
394 	(void)soclose(so1);
395 	return (error);
396 }
397 
398 int
399 sys_sendto(struct lwp *l, const struct sys_sendto_args *uap, register_t *retval)
400 {
401 	/* {
402 		syscallarg(int)				s;
403 		syscallarg(const void *)		buf;
404 		syscallarg(size_t)			len;
405 		syscallarg(int)				flags;
406 		syscallarg(const struct sockaddr *)	to;
407 		syscallarg(unsigned int)		tolen;
408 	} */
409 	struct msghdr	msg;
410 	struct iovec	aiov;
411 
412 	msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
413 	msg.msg_namelen = SCARG(uap, tolen);
414 	msg.msg_iov = &aiov;
415 	msg.msg_iovlen = 1;
416 	msg.msg_control = NULL;
417 	msg.msg_flags = 0;
418 	aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
419 	aiov.iov_len = SCARG(uap, len);
420 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
421 }
422 
423 int
424 sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap, register_t *retval)
425 {
426 	/* {
427 		syscallarg(int)				s;
428 		syscallarg(const struct msghdr *)	msg;
429 		syscallarg(int)				flags;
430 	} */
431 	struct msghdr	msg;
432 	int		error;
433 
434 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
435 	if (error)
436 		return (error);
437 
438 	msg.msg_flags = MSG_IOVUSRSPACE;
439 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
440 }
441 
442 int
443 do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
444 		register_t *retsize)
445 {
446 	struct uio	auio;
447 	int		i, len, error, iovlen;
448 	struct mbuf	*to, *control;
449 	struct socket	*so;
450 	struct iovec	*tiov;
451 	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov;
452 	struct iovec	*ktriov = NULL;
453 
454 	ktrkuser("msghdr", mp, sizeof *mp);
455 
456 	/* If the caller passed us stuff in mbufs, we must free them */
457 	if (mp->msg_flags & MSG_NAMEMBUF)
458 		to = mp->msg_name;
459 	else
460 		to = NULL;
461 
462 	if (mp->msg_flags & MSG_CONTROLMBUF)
463 		control = mp->msg_control;
464 	else
465 		control = NULL;
466 
467 	if (mp->msg_flags & MSG_IOVUSRSPACE) {
468 		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
469 			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
470 				error = EMSGSIZE;
471 				goto bad;
472 			}
473 			iov = malloc(sizeof(struct iovec) * mp->msg_iovlen,
474 			    M_IOV, M_WAITOK);
475 		}
476 		if (mp->msg_iovlen != 0) {
477 			error = copyin(mp->msg_iov, iov,
478 			    (size_t)(mp->msg_iovlen * sizeof(struct iovec)));
479 			if (error)
480 				goto bad;
481 		}
482 		mp->msg_iov = iov;
483 	}
484 
485 	auio.uio_iov = mp->msg_iov;
486 	auio.uio_iovcnt = mp->msg_iovlen;
487 	auio.uio_rw = UIO_WRITE;
488 	auio.uio_offset = 0;			/* XXX */
489 	auio.uio_resid = 0;
490 	KASSERT(l == curlwp);
491 	auio.uio_vmspace = l->l_proc->p_vmspace;
492 
493 	for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) {
494 #if 0
495 		/* cannot happen; iov_len is unsigned */
496 		if (tiov->iov_len < 0) {
497 			error = EINVAL;
498 			goto bad;
499 		}
500 #endif
501 		/*
502 		 * Writes return ssize_t because -1 is returned on error.
503 		 * Therefore, we must restrict the length to SSIZE_MAX to
504 		 * avoid garbage return values.
505 		 */
506 		auio.uio_resid += tiov->iov_len;
507 		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
508 			error = EINVAL;
509 			goto bad;
510 		}
511 	}
512 
513 	if (mp->msg_name && to == NULL) {
514 		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
515 		    MT_SONAME);
516 		if (error)
517 			goto bad;
518 	}
519 
520 	if (mp->msg_control) {
521 		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
522 			error = EINVAL;
523 			goto bad;
524 		}
525 		if (control == NULL) {
526 			error = sockargs(&control, mp->msg_control,
527 			    mp->msg_controllen, MT_CONTROL);
528 			if (error)
529 				goto bad;
530 		}
531 	}
532 
533 	if (ktrpoint(KTR_GENIO)) {
534 		iovlen = auio.uio_iovcnt * sizeof(struct iovec);
535 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
536 		memcpy(ktriov, auio.uio_iov, iovlen);
537 	}
538 
539 	if ((error = fd_getsock(s, &so)) != 0)
540 		goto bad;
541 
542 	if (mp->msg_name)
543 		MCLAIM(to, so->so_mowner);
544 	if (mp->msg_control)
545 		MCLAIM(control, so->so_mowner);
546 
547 	len = auio.uio_resid;
548 	error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
549 	/* Protocol is responsible for freeing 'control' */
550 	control = NULL;
551 
552 	fd_putfile(s);
553 
554 	if (error) {
555 		if (auio.uio_resid != len && (error == ERESTART ||
556 		    error == EINTR || error == EWOULDBLOCK))
557 			error = 0;
558 		if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
559 			mutex_enter(proc_lock);
560 			psignal(l->l_proc, SIGPIPE);
561 			mutex_exit(proc_lock);
562 		}
563 	}
564 	if (error == 0)
565 		*retsize = len - auio.uio_resid;
566 
567 bad:
568 	if (ktriov != NULL) {
569 		ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
570 		free(ktriov, M_TEMP);
571 	}
572 
573  	if (iov != aiov)
574 		free(iov, M_IOV);
575 	if (to)
576 		m_freem(to);
577 	if (control)
578 		m_freem(control);
579 
580 	return (error);
581 }
582 
583 int
584 sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap, register_t *retval)
585 {
586 	/* {
587 		syscallarg(int)			s;
588 		syscallarg(void *)		buf;
589 		syscallarg(size_t)		len;
590 		syscallarg(int)			flags;
591 		syscallarg(struct sockaddr *)	from;
592 		syscallarg(unsigned int *)	fromlenaddr;
593 	} */
594 	struct msghdr	msg;
595 	struct iovec	aiov;
596 	int		error;
597 	struct mbuf	*from;
598 
599 	msg.msg_name = NULL;
600 	msg.msg_iov = &aiov;
601 	msg.msg_iovlen = 1;
602 	aiov.iov_base = SCARG(uap, buf);
603 	aiov.iov_len = SCARG(uap, len);
604 	msg.msg_control = NULL;
605 	msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
606 
607 	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
608 	if (error != 0)
609 		return error;
610 
611 	error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
612 	    MSG_LENUSRSPACE, from);
613 	if (from != NULL)
614 		m_free(from);
615 	return error;
616 }
617 
618 int
619 sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap, register_t *retval)
620 {
621 	/* {
622 		syscallarg(int)			s;
623 		syscallarg(struct msghdr *)	msg;
624 		syscallarg(int)			flags;
625 	} */
626 	struct msghdr	msg;
627 	int		error;
628 	struct mbuf	*from, *control;
629 
630 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
631 	if (error)
632 		return (error);
633 
634 	msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
635 
636 	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
637 	    msg.msg_control != NULL ? &control : NULL, retval);
638 	if (error != 0)
639 		return error;
640 
641 	if (msg.msg_control != NULL)
642 		error = copyout_msg_control(l, &msg, control);
643 
644 	if (error == 0)
645 		error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
646 			from);
647 	if (from != NULL)
648 		m_free(from);
649 	if (error == 0) {
650 		ktrkuser("msghdr", &msg, sizeof msg);
651 		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
652 	}
653 
654 	return (error);
655 }
656 
657 /*
658  * Adjust for a truncated SCM_RIGHTS control message.
659  *  This means closing any file descriptors that aren't present
660  *  in the returned buffer.
661  *  m is the mbuf holding the (already externalized) SCM_RIGHTS message.
662  */
663 static void
664 free_rights(struct mbuf *m)
665 {
666 	int nfd;
667 	int i;
668 	int *fdv;
669 
670 	nfd = m->m_len < CMSG_SPACE(sizeof(int)) ? 0
671 	    : (m->m_len - CMSG_SPACE(sizeof(int))) / sizeof(int) + 1;
672 	fdv = (int *) CMSG_DATA(mtod(m,struct cmsghdr *));
673 	for (i = 0; i < nfd; i++) {
674 		if (fd_getfile(fdv[i]) != NULL)
675 			(void)fd_close(fdv[i]);
676 	}
677 }
678 
679 void
680 free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
681 {
682 	struct mbuf *next;
683 	struct cmsghdr *cmsg;
684 	bool do_free_rights = false;
685 
686 	while (control != NULL) {
687 		cmsg = mtod(control, struct cmsghdr *);
688 		if (control == uncopied)
689 			do_free_rights = true;
690 		if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
691 		    && cmsg->cmsg_type == SCM_RIGHTS)
692 			free_rights(control);
693 		next = control->m_next;
694 		m_free(control);
695 		control = next;
696 	}
697 }
698 
699 /* Copy socket control/CMSG data to user buffer, frees the mbuf */
700 int
701 copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
702 {
703 	int i, len, error = 0;
704 	struct cmsghdr *cmsg;
705 	struct mbuf *m;
706 	char *q;
707 
708 	len = mp->msg_controllen;
709 	if (len <= 0 || control == 0) {
710 		mp->msg_controllen = 0;
711 		free_control_mbuf(l, control, control);
712 		return 0;
713 	}
714 
715 	q = (char *)mp->msg_control;
716 
717 	for (m = control; m != NULL; ) {
718 		cmsg = mtod(m, struct cmsghdr *);
719 		i = m->m_len;
720 		if (len < i) {
721 			mp->msg_flags |= MSG_CTRUNC;
722 			if (cmsg->cmsg_level == SOL_SOCKET
723 			    && cmsg->cmsg_type == SCM_RIGHTS)
724 				/* Do not truncate me ... */
725 				break;
726 			i = len;
727 		}
728 		error = copyout(mtod(m, void *), q, i);
729 		ktrkuser("msgcontrol", mtod(m, void *), i);
730 		if (error != 0) {
731 			/* We must free all the SCM_RIGHTS */
732 			m = control;
733 			break;
734 		}
735 		m = m->m_next;
736 		if (m)
737 			i = ALIGN(i);
738 		q += i;
739 		len -= i;
740 		if (len <= 0)
741 			break;
742 	}
743 
744 	free_control_mbuf(l, control, m);
745 
746 	mp->msg_controllen = q - (char *)mp->msg_control;
747 	return error;
748 }
749 
750 int
751 do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from,
752     struct mbuf **control, register_t *retsize)
753 {
754 	struct uio	auio;
755 	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov;
756 	struct iovec	*tiov;
757 	int		i, len, error, iovlen;
758 	struct socket	*so;
759 	struct iovec	*ktriov;
760 
761 	ktrkuser("msghdr", mp, sizeof *mp);
762 
763 	*from = NULL;
764 	if (control != NULL)
765 		*control = NULL;
766 
767 	if ((error = fd_getsock(s, &so)) != 0)
768 		return (error);
769 
770 	if (mp->msg_flags & MSG_IOVUSRSPACE) {
771 		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
772 			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
773 				error = EMSGSIZE;
774 				goto out;
775 			}
776 			iov = malloc(sizeof(struct iovec) * mp->msg_iovlen,
777 			    M_IOV, M_WAITOK);
778 		}
779 		if (mp->msg_iovlen != 0) {
780 			error = copyin(mp->msg_iov, iov,
781 			    (size_t)(mp->msg_iovlen * sizeof(struct iovec)));
782 			if (error)
783 				goto out;
784 		}
785 		auio.uio_iov = iov;
786 	} else
787 		auio.uio_iov = mp->msg_iov;
788 	auio.uio_iovcnt = mp->msg_iovlen;
789 	auio.uio_rw = UIO_READ;
790 	auio.uio_offset = 0;			/* XXX */
791 	auio.uio_resid = 0;
792 	KASSERT(l == curlwp);
793 	auio.uio_vmspace = l->l_proc->p_vmspace;
794 
795 	tiov = auio.uio_iov;
796 	for (i = 0; i < mp->msg_iovlen; i++, tiov++) {
797 #if 0
798 		/* cannot happen iov_len is unsigned */
799 		if (tiov->iov_len < 0) {
800 			error = EINVAL;
801 			goto out;
802 		}
803 #endif
804 		/*
805 		 * Reads return ssize_t because -1 is returned on error.
806 		 * Therefore we must restrict the length to SSIZE_MAX to
807 		 * avoid garbage return values.
808 		 */
809 		auio.uio_resid += tiov->iov_len;
810 		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
811 			error = EINVAL;
812 			goto out;
813 		}
814 	}
815 
816 	ktriov = NULL;
817 	if (ktrpoint(KTR_GENIO)) {
818 		iovlen = auio.uio_iovcnt * sizeof(struct iovec);
819 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
820 		memcpy(ktriov, auio.uio_iov, iovlen);
821 	}
822 
823 	len = auio.uio_resid;
824 	mp->msg_flags &= MSG_USERFLAGS;
825 	error = (*so->so_receive)(so, from, &auio, NULL, control,
826 	    &mp->msg_flags);
827 	len -= auio.uio_resid;
828 	*retsize = len;
829 	if (error != 0 && len != 0
830 	    && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
831 		/* Some data transferred */
832 		error = 0;
833 
834 	if (ktriov != NULL) {
835 		ktrgeniov(s, UIO_READ, ktriov, len, error);
836 		free(ktriov, M_TEMP);
837 	}
838 
839 	if (error != 0) {
840 		m_freem(*from);
841 		*from = NULL;
842 		if (control != NULL) {
843 			free_control_mbuf(l, *control, *control);
844 			*control = NULL;
845 		}
846 	}
847  out:
848 	if (iov != aiov)
849 		free(iov, M_TEMP);
850 	fd_putfile(s);
851 	return (error);
852 }
853 
854 
855 /* ARGSUSED */
856 int
857 sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap, register_t *retval)
858 {
859 	/* {
860 		syscallarg(int)	s;
861 		syscallarg(int)	how;
862 	} */
863 	struct socket	*so;
864 	int		error;
865 
866 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
867 		return (error);
868 	solock(so);
869 	error = soshutdown(so, SCARG(uap, how));
870 	sounlock(so);
871 	fd_putfile(SCARG(uap, s));
872 	return (error);
873 }
874 
875 /* ARGSUSED */
876 int
877 sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap, register_t *retval)
878 {
879 	/* {
880 		syscallarg(int)			s;
881 		syscallarg(int)			level;
882 		syscallarg(int)			name;
883 		syscallarg(const void *)	val;
884 		syscallarg(unsigned int)	valsize;
885 	} */
886 	struct proc	*p;
887 	struct mbuf	*m;
888 	struct socket	*so;
889 	int		error;
890 	unsigned int	len;
891 
892 	p = l->l_proc;
893 	m = NULL;
894 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
895 		return (error);
896 	len = SCARG(uap, valsize);
897 	if (len > MCLBYTES) {
898 		error = EINVAL;
899 		goto out;
900 	}
901 	if (SCARG(uap, val)) {
902 		m = getsombuf(so, MT_SOOPTS);
903 		if (len > MLEN)
904 			m_clget(m, M_WAIT);
905 		error = copyin(SCARG(uap, val), mtod(m, void *), len);
906 		if (error) {
907 			(void) m_free(m);
908 			goto out;
909 		}
910 		m->m_len = SCARG(uap, valsize);
911 	}
912 	error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
913  out:
914  	fd_putfile(SCARG(uap, s));
915 	return (error);
916 }
917 
918 /* ARGSUSED */
919 int
920 sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap, register_t *retval)
921 {
922 	/* {
923 		syscallarg(int)			s;
924 		syscallarg(int)			level;
925 		syscallarg(int)			name;
926 		syscallarg(void *)		val;
927 		syscallarg(unsigned int *)	avalsize;
928 	} */
929 	struct socket	*so;
930 	struct mbuf	*m;
931 	unsigned int	op, i, valsize;
932 	int		error;
933 	char *val = SCARG(uap, val);
934 
935 	m = NULL;
936 	if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
937 		return (error);
938 	if (val != NULL) {
939 		error = copyin(SCARG(uap, avalsize),
940 			       &valsize, sizeof(valsize));
941 		if (error)
942 			goto out;
943 	} else
944 		valsize = 0;
945 	error = sogetopt(so, SCARG(uap, level), SCARG(uap, name), &m);
946 	if (error == 0 && val != NULL && valsize && m != NULL) {
947 		op = 0;
948 		while (m && !error && op < valsize) {
949 			i = min(m->m_len, (valsize - op));
950 			error = copyout(mtod(m, void *), val, i);
951 			op += i;
952 			val += i;
953 			m = m_free(m);
954 		}
955 		valsize = op;
956 		if (error == 0)
957 			error = copyout(&valsize,
958 					SCARG(uap, avalsize), sizeof(valsize));
959 	}
960 	if (m != NULL)
961 		(void) m_freem(m);
962  out:
963  	fd_putfile(SCARG(uap, s));
964 	return (error);
965 }
966 
967 #ifdef PIPE_SOCKETPAIR
968 /* ARGSUSED */
969 int
970 sys_pipe(struct lwp *l, const void *v, register_t *retval)
971 {
972 	file_t		*rf, *wf;
973 	struct socket	*rso, *wso;
974 	int		fd, error;
975 	proc_t		*p;
976 
977 	p = curproc;
978 	if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0)
979 		return (error);
980 	if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0)
981 		goto free1;
982 	/* remember this socket pair implements a pipe */
983 	wso->so_state |= SS_ISAPIPE;
984 	rso->so_state |= SS_ISAPIPE;
985 	if ((error = fd_allocfile(&rf, &fd)) != 0)
986 		goto free2;
987 	retval[0] = fd;
988 	rf->f_flag = FREAD;
989 	rf->f_type = DTYPE_SOCKET;
990 	rf->f_ops = &socketops;
991 	rf->f_data = rso;
992 	if ((error = fd_allocfile(&wf, &fd)) != 0)
993 		goto free3;
994 	wf->f_flag = FWRITE;
995 	wf->f_type = DTYPE_SOCKET;
996 	wf->f_ops = &socketops;
997 	wf->f_data = wso;
998 	retval[1] = fd;
999 	solock(wso);
1000 	error = unp_connect2(wso, rso, PRU_CONNECT2);
1001 	sounlock(wso);
1002 	if (error != 0)
1003 		goto free4;
1004 	fd_affix(p, wf, (int)retval[1]);
1005 	fd_affix(p, rf, (int)retval[0]);
1006 	return (0);
1007  free4:
1008 	fd_abort(p, wf, (int)retval[1]);
1009  free3:
1010 	fd_abort(p, rf, (int)retval[0]);
1011  free2:
1012 	(void)soclose(wso);
1013  free1:
1014 	(void)soclose(rso);
1015 	return (error);
1016 }
1017 #endif /* PIPE_SOCKETPAIR */
1018 
1019 /*
1020  * Get socket name.
1021  */
1022 /* ARGSUSED */
1023 int
1024 do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam)
1025 {
1026 	struct socket	*so;
1027 	struct mbuf	*m;
1028 	int		error;
1029 
1030 	if ((error = fd_getsock(fd, &so)) != 0)
1031 		return error;
1032 
1033 	m = m_getclr(M_WAIT, MT_SONAME);
1034 	MCLAIM(m, so->so_mowner);
1035 
1036 	solock(so);
1037 	if (which == PRU_PEERADDR
1038 	    && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
1039 		error = ENOTCONN;
1040 	} else {
1041 		*nam = m;
1042 		error = (*so->so_proto->pr_usrreq)(so, which, NULL, m, NULL,
1043 		    NULL);
1044 	}
1045  	sounlock(so);
1046 	if (error != 0)
1047 		m_free(m);
1048  	fd_putfile(fd);
1049 	return error;
1050 }
1051 
1052 int
1053 copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
1054     struct mbuf *addr)
1055 {
1056 	int len;
1057 	int error;
1058 
1059 	if (asa == NULL)
1060 		/* Assume application not interested */
1061 		return 0;
1062 
1063 	if (flags & MSG_LENUSRSPACE) {
1064 		error = copyin(alen, &len, sizeof(len));
1065 		if (error)
1066 			return error;
1067 	} else
1068 		len = *alen;
1069 	if (len < 0)
1070 		return EINVAL;
1071 
1072 	if (addr == NULL) {
1073 		len = 0;
1074 		error = 0;
1075 	} else {
1076 		if (len > addr->m_len)
1077 			len = addr->m_len;
1078 		/* Maybe this ought to copy a chain ? */
1079 		ktrkuser("sockname", mtod(addr, void *), len);
1080 		error = copyout(mtod(addr, void *), asa, len);
1081 	}
1082 
1083 	if (error == 0) {
1084 		if (flags & MSG_LENUSRSPACE)
1085 			error = copyout(&len, alen, sizeof(len));
1086 		else
1087 			*alen = len;
1088 	}
1089 
1090 	return error;
1091 }
1092 
1093 /*
1094  * Get socket name.
1095  */
1096 /* ARGSUSED */
1097 int
1098 sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap, register_t *retval)
1099 {
1100 	/* {
1101 		syscallarg(int)			fdes;
1102 		syscallarg(struct sockaddr *)	asa;
1103 		syscallarg(unsigned int *)	alen;
1104 	} */
1105 	struct mbuf	*m;
1106 	int		error;
1107 
1108 	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_SOCKADDR, &m);
1109 	if (error != 0)
1110 		return error;
1111 
1112 	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1113 	    MSG_LENUSRSPACE, m);
1114 	if (m != NULL)
1115 		m_free(m);
1116 	return error;
1117 }
1118 
1119 /*
1120  * Get name of peer for connected socket.
1121  */
1122 /* ARGSUSED */
1123 int
1124 sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap, register_t *retval)
1125 {
1126 	/* {
1127 		syscallarg(int)			fdes;
1128 		syscallarg(struct sockaddr *)	asa;
1129 		syscallarg(unsigned int *)	alen;
1130 	} */
1131 	struct mbuf	*m;
1132 	int		error;
1133 
1134 	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_PEERADDR, &m);
1135 	if (error != 0)
1136 		return error;
1137 
1138 	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1139 	    MSG_LENUSRSPACE, m);
1140 	if (m != NULL)
1141 		m_free(m);
1142 	return error;
1143 }
1144 
1145 /*
1146  * XXX In a perfect world, we wouldn't pass around socket control
1147  * XXX arguments in mbufs, and this could go away.
1148  */
1149 int
1150 sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
1151 {
1152 	struct sockaddr	*sa;
1153 	struct mbuf	*m;
1154 	int		error;
1155 
1156 	/*
1157 	 * We can't allow socket names > UCHAR_MAX in length, since that
1158 	 * will overflow sa_len.  Control data more than a page size in
1159 	 * length is just too much.
1160 	 */
1161 	if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1162 		return (EINVAL);
1163 
1164 	/* Allocate an mbuf to hold the arguments. */
1165 	m = m_get(M_WAIT, type);
1166 	/* can't claim.  don't who to assign it to. */
1167 	if (buflen > MLEN) {
1168 		/*
1169 		 * Won't fit into a regular mbuf, so we allocate just
1170 		 * enough external storage to hold the argument.
1171 		 */
1172 		MEXTMALLOC(m, buflen, M_WAITOK);
1173 	}
1174 	m->m_len = buflen;
1175 	error = copyin(bf, mtod(m, void *), buflen);
1176 	if (error) {
1177 		(void) m_free(m);
1178 		return (error);
1179 	}
1180 	ktrkuser("sockargs", mtod(m, void *), buflen);
1181 	*mp = m;
1182 	if (type == MT_SONAME) {
1183 		sa = mtod(m, struct sockaddr *);
1184 #if BYTE_ORDER != BIG_ENDIAN
1185 		/*
1186 		 * 4.3BSD compat thing - need to stay, since bind(2),
1187 		 * connect(2), sendto(2) were not versioned for COMPAT_43.
1188 		 */
1189 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1190 			sa->sa_family = sa->sa_len;
1191 #endif
1192 		sa->sa_len = buflen;
1193 	}
1194 	return (0);
1195 }
1196 
1197 int
1198 getsock(int fdes, struct file **fpp)
1199 {
1200 	file_t		*fp;
1201 
1202 	if ((fp = fd_getfile(fdes)) == NULL)
1203 		return (EBADF);
1204 
1205 	if (fp->f_type != DTYPE_SOCKET) {
1206 		fd_putfile(fdes);
1207 		return (ENOTSOCK);
1208 	}
1209 	*fpp = fp;
1210 	return (0);
1211 }
1212