xref: /netbsd-src/sys/kern/uipc_syscalls.c (revision bf1e9b32e27832f0c493206710fb8b58a980838a)
1 /*	$NetBSD: uipc_syscalls.c,v 1.92 2005/05/30 11:21:11 martin Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)uipc_syscalls.c	8.6 (Berkeley) 2/14/95
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.92 2005/05/30 11:21:11 martin Exp $");
36 
37 #include "opt_ktrace.h"
38 #include "opt_pipe.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/filedesc.h>
43 #include <sys/proc.h>
44 #include <sys/file.h>
45 #include <sys/buf.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/un.h>
53 #ifdef KTRACE
54 #include <sys/ktrace.h>
55 #endif
56 #include <sys/event.h>
57 
58 #include <sys/mount.h>
59 #include <sys/sa.h>
60 #include <sys/syscallargs.h>
61 
62 #include <uvm/uvm_extern.h>
63 
64 static void adjust_rights(struct mbuf *m, int len, struct proc *p);
65 
66 /*
67  * System call interface to the socket abstraction.
68  */
69 extern const struct fileops socketops;
70 
71 int
72 sys_socket(struct lwp *l, void *v, register_t *retval)
73 {
74 	struct sys_socket_args /* {
75 		syscallarg(int)	domain;
76 		syscallarg(int)	type;
77 		syscallarg(int)	protocol;
78 	} */ *uap = v;
79 
80 	struct proc	*p;
81 	struct filedesc	*fdp;
82 	struct socket	*so;
83 	struct file	*fp;
84 	int		fd, error;
85 
86 	p = l->l_proc;
87 	fdp = p->p_fd;
88 	/* falloc() will use the desciptor for us */
89 	if ((error = falloc(p, &fp, &fd)) != 0)
90 		return (error);
91 	fp->f_flag = FREAD|FWRITE;
92 	fp->f_type = DTYPE_SOCKET;
93 	fp->f_ops = &socketops;
94 	error = socreate(SCARG(uap, domain), &so, SCARG(uap, type),
95 			 SCARG(uap, protocol), p);
96 	if (error) {
97 		FILE_UNUSE(fp, p);
98 		fdremove(fdp, fd);
99 		ffree(fp);
100 	} else {
101 		fp->f_data = (caddr_t)so;
102 		FILE_SET_MATURE(fp);
103 		FILE_UNUSE(fp, p);
104 		*retval = fd;
105 	}
106 	return (error);
107 }
108 
109 /* ARGSUSED */
110 int
111 sys_bind(struct lwp *l, void *v, register_t *retval)
112 {
113 	struct sys_bind_args /* {
114 		syscallarg(int)				s;
115 		syscallarg(const struct sockaddr *)	name;
116 		syscallarg(unsigned int)		namelen;
117 	} */ *uap = v;
118 	struct proc	*p;
119 	struct file	*fp;
120 	struct mbuf	*nam;
121 	int		error;
122 
123 	p = l->l_proc;
124 	/* getsock() will use the descriptor for us */
125 	if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
126 		return (error);
127 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
128 	    MT_SONAME);
129 	if (error) {
130 		FILE_UNUSE(fp, p);
131 		return (error);
132 	}
133 	MCLAIM(nam, ((struct socket *)fp->f_data)->so_mowner);
134 	error = sobind((struct socket *)fp->f_data, nam, p);
135 	m_freem(nam);
136 	FILE_UNUSE(fp, p);
137 	return (error);
138 }
139 
140 /* ARGSUSED */
141 int
142 sys_listen(struct lwp *l, void *v, register_t *retval)
143 {
144 	struct sys_listen_args /* {
145 		syscallarg(int)	s;
146 		syscallarg(int)	backlog;
147 	} */ *uap = v;
148 	struct proc	*p;
149 	struct file	*fp;
150 	int		error;
151 
152 	p = l->l_proc;
153 	/* getsock() will use the descriptor for us */
154 	if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
155 		return (error);
156 	error = solisten((struct socket *)fp->f_data, SCARG(uap, backlog));
157 	FILE_UNUSE(fp, p);
158 	return (error);
159 }
160 
161 int
162 sys_accept(struct lwp *l, void *v, register_t *retval)
163 {
164 	struct sys_accept_args /* {
165 		syscallarg(int)			s;
166 		syscallarg(struct sockaddr *)	name;
167 		syscallarg(unsigned int *)	anamelen;
168 	} */ *uap = v;
169 	struct proc	*p;
170 	struct filedesc	*fdp;
171 	struct file	*fp;
172 	struct mbuf	*nam;
173 	unsigned int	namelen;
174 	int		error, s, fd;
175 	struct socket	*so;
176 	int		fflag;
177 
178 	p = l->l_proc;
179 	fdp = p->p_fd;
180 	if (SCARG(uap, name) && (error = copyin(SCARG(uap, anamelen),
181 	    &namelen, sizeof(namelen))))
182 		return (error);
183 
184 	/* getsock() will use the descriptor for us */
185 	if ((error = getsock(fdp, SCARG(uap, s), &fp)) != 0)
186 		return (error);
187 	s = splsoftnet();
188 	so = (struct socket *)fp->f_data;
189 	FILE_UNUSE(fp, p);
190 	if (!(so->so_proto->pr_flags & PR_LISTEN)) {
191 		splx(s);
192 		return (EOPNOTSUPP);
193 	}
194 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
195 		splx(s);
196 		return (EINVAL);
197 	}
198 	if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
199 		splx(s);
200 		return (EWOULDBLOCK);
201 	}
202 	while (so->so_qlen == 0 && so->so_error == 0) {
203 		if (so->so_state & SS_CANTRCVMORE) {
204 			so->so_error = ECONNABORTED;
205 			break;
206 		}
207 		error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
208 			       netcon, 0);
209 		if (error) {
210 			splx(s);
211 			return (error);
212 		}
213 	}
214 	if (so->so_error) {
215 		error = so->so_error;
216 		so->so_error = 0;
217 		splx(s);
218 		return (error);
219 	}
220 	fflag = fp->f_flag;
221 	/* falloc() will use the descriptor for us */
222 	if ((error = falloc(p, &fp, &fd)) != 0) {
223 		splx(s);
224 		return (error);
225 	}
226 	*retval = fd;
227 
228 	/* connection has been removed from the listen queue */
229 	KNOTE(&so->so_rcv.sb_sel.sel_klist, 0);
230 
231 	{ struct socket *aso = TAILQ_FIRST(&so->so_q);
232 	  if (soqremque(aso, 1) == 0)
233 		panic("accept");
234 	  so = aso;
235 	}
236 	fp->f_type = DTYPE_SOCKET;
237 	fp->f_flag = fflag;
238 	fp->f_ops = &socketops;
239 	fp->f_data = (caddr_t)so;
240 	FILE_UNUSE(fp, p);
241 	nam = m_get(M_WAIT, MT_SONAME);
242 	if ((error = soaccept(so, nam)) == 0 && SCARG(uap, name)) {
243 		if (namelen > nam->m_len)
244 			namelen = nam->m_len;
245 		/* SHOULD COPY OUT A CHAIN HERE */
246 		if ((error = copyout(mtod(nam, caddr_t),
247 		    (caddr_t)SCARG(uap, name), namelen)) == 0)
248 			error = copyout((caddr_t)&namelen,
249 			    (caddr_t)SCARG(uap, anamelen),
250 			    sizeof(*SCARG(uap, anamelen)));
251 	}
252 	/* if an error occurred, free the file descriptor */
253 	if (error) {
254 		fdremove(fdp, fd);
255 		ffree(fp);
256 	}
257 	m_freem(nam);
258 	splx(s);
259 	FILE_SET_MATURE(fp);
260 	return (error);
261 }
262 
263 /* ARGSUSED */
264 int
265 sys_connect(struct lwp *l, void *v, register_t *retval)
266 {
267 	struct sys_connect_args /* {
268 		syscallarg(int)				s;
269 		syscallarg(const struct sockaddr *)	name;
270 		syscallarg(unsigned int)		namelen;
271 	} */ *uap = v;
272 	struct proc	*p;
273 	struct file	*fp;
274 	struct socket	*so;
275 	struct mbuf	*nam;
276 	int		error, s;
277 	int		interrupted = 0;
278 
279 	p = l->l_proc;
280 	/* getsock() will use the descriptor for us */
281 	if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
282 		return (error);
283 	so = (struct socket *)fp->f_data;
284 	if (so->so_state & SS_ISCONNECTING) {
285 		error = EALREADY;
286 		goto out;
287 	}
288 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
289 	    MT_SONAME);
290 	if (error)
291 		goto out;
292 	MCLAIM(nam, so->so_mowner);
293 	error = soconnect(so, nam, p);
294 	if (error)
295 		goto bad;
296 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
297 		m_freem(nam);
298 		error = EINPROGRESS;
299 		goto out;
300 	}
301 	s = splsoftnet();
302 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
303 		error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
304 			       netcon, 0);
305 		if (error) {
306 			if (error == EINTR || error == ERESTART)
307 				interrupted = 1;
308 			break;
309 		}
310 	}
311 	if (error == 0) {
312 		error = so->so_error;
313 		so->so_error = 0;
314 	}
315 	splx(s);
316  bad:
317 	if (!interrupted)
318 		so->so_state &= ~SS_ISCONNECTING;
319 	m_freem(nam);
320 	if (error == ERESTART)
321 		error = EINTR;
322  out:
323 	FILE_UNUSE(fp, p);
324 	return (error);
325 }
326 
327 int
328 sys_socketpair(struct lwp *l, void *v, register_t *retval)
329 {
330 	struct sys_socketpair_args /* {
331 		syscallarg(int)		domain;
332 		syscallarg(int)		type;
333 		syscallarg(int)		protocol;
334 		syscallarg(int *)	rsv;
335 	} */ *uap = v;
336 	struct proc *p;
337 	struct filedesc	*fdp;
338 	struct file	*fp1, *fp2;
339 	struct socket	*so1, *so2;
340 	int		fd, error, sv[2];
341 
342 	p = l->l_proc;
343 	fdp = p->p_fd;
344 	error = socreate(SCARG(uap, domain), &so1, SCARG(uap, type),
345 			 SCARG(uap, protocol), p);
346 	if (error)
347 		return (error);
348 	error = socreate(SCARG(uap, domain), &so2, SCARG(uap, type),
349 			 SCARG(uap, protocol), p);
350 	if (error)
351 		goto free1;
352 	/* falloc() will use the descriptor for us */
353 	if ((error = falloc(p, &fp1, &fd)) != 0)
354 		goto free2;
355 	sv[0] = fd;
356 	fp1->f_flag = FREAD|FWRITE;
357 	fp1->f_type = DTYPE_SOCKET;
358 	fp1->f_ops = &socketops;
359 	fp1->f_data = (caddr_t)so1;
360 	if ((error = falloc(p, &fp2, &fd)) != 0)
361 		goto free3;
362 	fp2->f_flag = FREAD|FWRITE;
363 	fp2->f_type = DTYPE_SOCKET;
364 	fp2->f_ops = &socketops;
365 	fp2->f_data = (caddr_t)so2;
366 	sv[1] = fd;
367 	if ((error = soconnect2(so1, so2)) != 0)
368 		goto free4;
369 	if (SCARG(uap, type) == SOCK_DGRAM) {
370 		/*
371 		 * Datagram socket connection is asymmetric.
372 		 */
373 		 if ((error = soconnect2(so2, so1)) != 0)
374 			goto free4;
375 	}
376 	error = copyout((caddr_t)sv, (caddr_t)SCARG(uap, rsv),
377 	    2 * sizeof(int));
378 	FILE_SET_MATURE(fp1);
379 	FILE_SET_MATURE(fp2);
380 	FILE_UNUSE(fp1, p);
381 	FILE_UNUSE(fp2, p);
382 	return (error);
383  free4:
384 	FILE_UNUSE(fp2, p);
385 	ffree(fp2);
386 	fdremove(fdp, sv[1]);
387  free3:
388 	FILE_UNUSE(fp1, p);
389 	ffree(fp1);
390 	fdremove(fdp, sv[0]);
391  free2:
392 	(void)soclose(so2);
393  free1:
394 	(void)soclose(so1);
395 	return (error);
396 }
397 
398 int
399 sys_sendto(struct lwp *l, void *v, register_t *retval)
400 {
401 	struct sys_sendto_args /* {
402 		syscallarg(int)				s;
403 		syscallarg(const void *)		buf;
404 		syscallarg(size_t)			len;
405 		syscallarg(int)				flags;
406 		syscallarg(const struct sockaddr *)	to;
407 		syscallarg(unsigned int)		tolen;
408 	} */ *uap = v;
409 	struct proc	*p;
410 	struct msghdr	msg;
411 	struct iovec	aiov;
412 
413 	p = l->l_proc;
414 	msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
415 	msg.msg_namelen = SCARG(uap, tolen);
416 	msg.msg_iov = &aiov;
417 	msg.msg_iovlen = 1;
418 	msg.msg_control = 0;
419 	msg.msg_flags = 0;
420 	aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
421 	aiov.iov_len = SCARG(uap, len);
422 	return (sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval));
423 }
424 
425 int
426 sys_sendmsg(struct lwp *l, void *v, register_t *retval)
427 {
428 	struct sys_sendmsg_args /* {
429 		syscallarg(int)				s;
430 		syscallarg(const struct msghdr *)	msg;
431 		syscallarg(int)				flags;
432 	} */ *uap = v;
433 	struct proc	*p;
434 	struct msghdr	msg;
435 	struct iovec	aiov[UIO_SMALLIOV], *iov;
436 	int		error;
437 
438 	error = copyin(SCARG(uap, msg), (caddr_t)&msg, sizeof(msg));
439 	if (error)
440 		return (error);
441 	if ((unsigned int)msg.msg_iovlen > UIO_SMALLIOV) {
442 		if ((unsigned int)msg.msg_iovlen > IOV_MAX)
443 			return (EMSGSIZE);
444 		iov = malloc(sizeof(struct iovec) * msg.msg_iovlen,
445 		    M_IOV, M_WAITOK);
446 	} else
447 		iov = aiov;
448 	if ((unsigned int)msg.msg_iovlen > 0) {
449 		error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
450 		    (size_t)(msg.msg_iovlen * sizeof(struct iovec)));
451 		if (error)
452 			goto done;
453 	}
454 	msg.msg_iov = iov;
455 	msg.msg_flags = 0;
456 	p = l->l_proc;
457 	error = sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
458 done:
459 	if (iov != aiov)
460 		free(iov, M_IOV);
461 	return (error);
462 }
463 
464 int
465 sendit(struct proc *p, int s, struct msghdr *mp, int flags, register_t *retsize)
466 {
467 	struct file	*fp;
468 	struct uio	auio;
469 	struct iovec	*iov;
470 	int		i, len, error;
471 	struct mbuf	*to, *control;
472 	struct socket	*so;
473 #ifdef KTRACE
474 	struct iovec	*ktriov;
475 #endif
476 
477 #ifdef KTRACE
478 	ktriov = NULL;
479 #endif
480 	/* getsock() will use the descriptor for us */
481 	if ((error = getsock(p->p_fd, s, &fp)) != 0)
482 		return (error);
483 	so = (struct socket *)fp->f_data;
484 	auio.uio_iov = mp->msg_iov;
485 	auio.uio_iovcnt = mp->msg_iovlen;
486 	auio.uio_segflg = UIO_USERSPACE;
487 	auio.uio_rw = UIO_WRITE;
488 	auio.uio_procp = p;
489 	auio.uio_offset = 0;			/* XXX */
490 	auio.uio_resid = 0;
491 	iov = mp->msg_iov;
492 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
493 #if 0
494 		/* cannot happen; iov_len is unsigned */
495 		if (iov->iov_len < 0) {
496 			error = EINVAL;
497 			goto out;
498 		}
499 #endif
500 		/*
501 		 * Writes return ssize_t because -1 is returned on error.
502 		 * Therefore, we must restrict the length to SSIZE_MAX to
503 		 * avoid garbage return values.
504 		 */
505 		auio.uio_resid += iov->iov_len;
506 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
507 			error = EINVAL;
508 			goto out;
509 		}
510 	}
511 	if (mp->msg_name) {
512 		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
513 				 MT_SONAME);
514 		if (error)
515 			goto out;
516 		MCLAIM(to, so->so_mowner);
517 	} else
518 		to = 0;
519 	if (mp->msg_control) {
520 		if (mp->msg_controllen < sizeof(struct cmsghdr)) {
521 			error = EINVAL;
522 			goto bad;
523 		}
524 		error = sockargs(&control, mp->msg_control,
525 				 mp->msg_controllen, MT_CONTROL);
526 		if (error)
527 			goto bad;
528 		MCLAIM(control, so->so_mowner);
529 	} else
530 		control = 0;
531 #ifdef KTRACE
532 	if (KTRPOINT(p, KTR_GENIO)) {
533 		int iovlen = auio.uio_iovcnt * sizeof(struct iovec);
534 
535 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
536 		memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
537 	}
538 #endif
539 	len = auio.uio_resid;
540 	error = (*so->so_send)(so, to, &auio, NULL, control, flags, p);
541 	if (error) {
542 		if (auio.uio_resid != len && (error == ERESTART ||
543 		    error == EINTR || error == EWOULDBLOCK))
544 			error = 0;
545 		if (error == EPIPE)
546 			psignal(p, SIGPIPE);
547 	}
548 	if (error == 0)
549 		*retsize = len - auio.uio_resid;
550 #ifdef KTRACE
551 	if (ktriov != NULL) {
552 		if (error == 0)
553 			ktrgenio(p, s, UIO_WRITE, ktriov, *retsize, error);
554 		free(ktriov, M_TEMP);
555 	}
556 #endif
557  bad:
558 	if (to)
559 		m_freem(to);
560  out:
561 	FILE_UNUSE(fp, p);
562 	return (error);
563 }
564 
565 int
566 sys_recvfrom(struct lwp *l, void *v, register_t *retval)
567 {
568 	struct sys_recvfrom_args /* {
569 		syscallarg(int)			s;
570 		syscallarg(void *)		buf;
571 		syscallarg(size_t)		len;
572 		syscallarg(int)			flags;
573 		syscallarg(struct sockaddr *)	from;
574 		syscallarg(unsigned int *)	fromlenaddr;
575 	} */ *uap = v;
576 	struct proc	*p;
577 	struct msghdr	msg;
578 	struct iovec	aiov;
579 	int		error;
580 
581 	if (SCARG(uap, fromlenaddr)) {
582 		error = copyin((caddr_t)SCARG(uap, fromlenaddr),
583 			       (caddr_t)&msg.msg_namelen,
584 			       sizeof(msg.msg_namelen));
585 		if (error)
586 			return (error);
587 	} else
588 		msg.msg_namelen = 0;
589 	msg.msg_name = (caddr_t)SCARG(uap, from);
590 	msg.msg_iov = &aiov;
591 	msg.msg_iovlen = 1;
592 	aiov.iov_base = SCARG(uap, buf);
593 	aiov.iov_len = SCARG(uap, len);
594 	msg.msg_control = 0;
595 	msg.msg_flags = SCARG(uap, flags);
596 	p = l->l_proc;
597 	return (recvit(p, SCARG(uap, s), &msg,
598 		       (caddr_t)SCARG(uap, fromlenaddr), retval));
599 }
600 
601 int
602 sys_recvmsg(struct lwp *l, void *v, register_t *retval)
603 {
604 	struct sys_recvmsg_args /* {
605 		syscallarg(int)			s;
606 		syscallarg(struct msghdr *)	msg;
607 		syscallarg(int)			flags;
608 	} */ *uap = v;
609 	struct proc	*p;
610 	struct msghdr	msg;
611 	struct iovec	aiov[UIO_SMALLIOV], *uiov, *iov;
612 	int		error;
613 
614 	error = copyin((caddr_t)SCARG(uap, msg), (caddr_t)&msg,
615 		       sizeof(msg));
616 	if (error)
617 		return (error);
618 	if ((unsigned int)msg.msg_iovlen > UIO_SMALLIOV) {
619 		if ((unsigned int)msg.msg_iovlen > IOV_MAX)
620 			return (EMSGSIZE);
621 		iov = malloc(sizeof(struct iovec) * msg.msg_iovlen,
622 		    M_IOV, M_WAITOK);
623 	} else
624 		iov = aiov;
625 	if ((unsigned int)msg.msg_iovlen > 0) {
626 		error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
627 		    (size_t)(msg.msg_iovlen * sizeof(struct iovec)));
628 		if (error)
629 			goto done;
630 	}
631 	uiov = msg.msg_iov;
632 	msg.msg_iov = iov;
633 	msg.msg_flags = SCARG(uap, flags);
634 	p = l->l_proc;
635 	if ((error = recvit(p, SCARG(uap, s), &msg, (caddr_t)0, retval)) == 0) {
636 		msg.msg_iov = uiov;
637 		error = copyout((caddr_t)&msg, (caddr_t)SCARG(uap, msg),
638 		    sizeof(msg));
639 	}
640 done:
641 	if (iov != aiov)
642 		free(iov, M_IOV);
643 	return (error);
644 }
645 
646 /*
647  * Adjust for a truncated SCM_RIGHTS control message.  This means
648  *  closing any file descriptors that aren't entirely present in the
649  *  returned buffer.  m is the mbuf holding the (already externalized)
650  *  SCM_RIGHTS message; len is the length it is being truncated to.  p
651  *  is the affected process.
652  */
653 static
654 void adjust_rights(struct mbuf *m, int len, struct proc *p)
655 {
656 	int nfd;
657 	int i;
658 	int nok;
659 	int *fdv;
660 
661 	nfd = (m->m_len - CMSG_LEN(0)) / sizeof(int);
662 	nok = (len < CMSG_LEN(0)) ? 0 : ((len - CMSG_LEN(0)) / sizeof(int));
663 	fdv = (int *) CMSG_DATA(mtod(m,struct cmsghdr *));
664 	for (i = nok; i < nfd; i++)
665 		fdrelease(p,fdv[i]);
666 }
667 
668 int
669 recvit(struct proc *p, int s, struct msghdr *mp, caddr_t namelenp,
670 	register_t *retsize)
671 {
672 	struct file	*fp;
673 	struct uio	auio;
674 	struct iovec	*iov;
675 	int		i, len, error;
676 	struct mbuf	*from, *control;
677 	struct socket	*so;
678 #ifdef KTRACE
679 	struct iovec	*ktriov;
680 #endif
681 
682 	from = 0;
683 	control = 0;
684 #ifdef KTRACE
685 	ktriov = NULL;
686 #endif
687 
688 	/* getsock() will use the descriptor for us */
689 	if ((error = getsock(p->p_fd, s, &fp)) != 0)
690 		return (error);
691 	so = (struct socket *)fp->f_data;
692 	auio.uio_iov = mp->msg_iov;
693 	auio.uio_iovcnt = mp->msg_iovlen;
694 	auio.uio_segflg = UIO_USERSPACE;
695 	auio.uio_rw = UIO_READ;
696 	auio.uio_procp = p;
697 	auio.uio_offset = 0;			/* XXX */
698 	auio.uio_resid = 0;
699 	iov = mp->msg_iov;
700 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
701 #if 0
702 		/* cannot happen iov_len is unsigned */
703 		if (iov->iov_len < 0) {
704 			error = EINVAL;
705 			goto out1;
706 		}
707 #endif
708 		/*
709 		 * Reads return ssize_t because -1 is returned on error.
710 		 * Therefore we must restrict the length to SSIZE_MAX to
711 		 * avoid garbage return values.
712 		 */
713 		auio.uio_resid += iov->iov_len;
714 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
715 			error = EINVAL;
716 			goto out1;
717 		}
718 	}
719 #ifdef KTRACE
720 	if (KTRPOINT(p, KTR_GENIO)) {
721 		int iovlen = auio.uio_iovcnt * sizeof(struct iovec);
722 
723 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
724 		memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
725 	}
726 #endif
727 	len = auio.uio_resid;
728 	error = (*so->so_receive)(so, &from, &auio, NULL,
729 			  mp->msg_control ? &control : NULL, &mp->msg_flags);
730 	if (error) {
731 		if (auio.uio_resid != len && (error == ERESTART ||
732 		    error == EINTR || error == EWOULDBLOCK))
733 			error = 0;
734 	}
735 #ifdef KTRACE
736 	if (ktriov != NULL) {
737 		if (error == 0)
738 			ktrgenio(p, s, UIO_READ, ktriov,
739 			    len - auio.uio_resid, error);
740 		free(ktriov, M_TEMP);
741 	}
742 #endif
743 	if (error)
744 		goto out;
745 	*retsize = len - auio.uio_resid;
746 	if (mp->msg_name) {
747 		len = mp->msg_namelen;
748 		if (len <= 0 || from == 0)
749 			len = 0;
750 		else {
751 			if (len > from->m_len)
752 				len = from->m_len;
753 			/* else if len < from->m_len ??? */
754 			error = copyout(mtod(from, caddr_t),
755 					(caddr_t)mp->msg_name, (unsigned)len);
756 			if (error)
757 				goto out;
758 		}
759 		mp->msg_namelen = len;
760 		if (namelenp &&
761 		    (error = copyout((caddr_t)&len, namelenp, sizeof(int))))
762 			goto out;
763 	}
764 	if (mp->msg_control) {
765 		len = mp->msg_controllen;
766 		if (len <= 0 || control == 0)
767 			len = 0;
768 		else {
769 			struct mbuf *m = control;
770 			caddr_t q = (caddr_t)mp->msg_control;
771 
772 			do {
773 				i = m->m_len;
774 				if (len < i) {
775 					mp->msg_flags |= MSG_CTRUNC;
776 					i = len;
777 					if (mtod(m, struct cmsghdr *)->
778 					    cmsg_type == SCM_RIGHTS)
779 						adjust_rights(m, len, p);
780 				}
781 				error = copyout(mtod(m, caddr_t), q,
782 				    (unsigned)i);
783 				m = m->m_next;
784 				if (m)
785 					i = ALIGN(i);
786 				q += i;
787 				len -= i;
788 				if (error != 0 || len <= 0)
789 					break;
790 			} while (m != NULL);
791 			while (m) {
792 				if (mtod(m, struct cmsghdr *)->
793 				    cmsg_type == SCM_RIGHTS)
794 					adjust_rights(m, 0, p);
795 				m = m->m_next;
796 			}
797 			len = q - (caddr_t)mp->msg_control;
798 		}
799 		mp->msg_controllen = len;
800 	}
801  out:
802 	if (from)
803 		m_freem(from);
804 	if (control)
805 		m_freem(control);
806  out1:
807 	FILE_UNUSE(fp, p);
808 	return (error);
809 }
810 
811 /* ARGSUSED */
812 int
813 sys_shutdown(struct lwp *l, void *v, register_t *retval)
814 {
815 	struct sys_shutdown_args /* {
816 		syscallarg(int)	s;
817 		syscallarg(int)	how;
818 	} */ *uap = v;
819 	struct proc	*p;
820 	struct file	*fp;
821 	int		error;
822 
823 	p = l->l_proc;
824 	/* getsock() will use the descriptor for us */
825 	if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
826 		return (error);
827 	error = soshutdown((struct socket *)fp->f_data, SCARG(uap, how));
828 	FILE_UNUSE(fp, p);
829 	return (error);
830 }
831 
832 /* ARGSUSED */
833 int
834 sys_setsockopt(struct lwp *l, void *v, register_t *retval)
835 {
836 	struct sys_setsockopt_args /* {
837 		syscallarg(int)			s;
838 		syscallarg(int)			level;
839 		syscallarg(int)			name;
840 		syscallarg(const void *)	val;
841 		syscallarg(unsigned int)	valsize;
842 	} */ *uap = v;
843 	struct proc	*p;
844 	struct file	*fp;
845 	struct mbuf	*m;
846 	struct socket	*so;
847 	int		error;
848 	unsigned int	len;
849 
850 	p = l->l_proc;
851 	m = NULL;
852 	/* getsock() will use the descriptor for us */
853 	if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
854 		return (error);
855 	so = (struct socket *)fp->f_data;
856 	len = SCARG(uap, valsize);
857 	if (len > MCLBYTES) {
858 		error = EINVAL;
859 		goto out;
860 	}
861 	if (SCARG(uap, val)) {
862 		m = m_get(M_WAIT, MT_SOOPTS);
863 		MCLAIM(m, so->so_mowner);
864 		if (len > MLEN)
865 			m_clget(m, M_WAIT);
866 		error = copyin(SCARG(uap, val), mtod(m, caddr_t), len);
867 		if (error) {
868 			(void) m_free(m);
869 			goto out;
870 		}
871 		m->m_len = SCARG(uap, valsize);
872 	}
873 	error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
874  out:
875 	FILE_UNUSE(fp, p);
876 	return (error);
877 }
878 
879 /* ARGSUSED */
880 int
881 sys_getsockopt(struct lwp *l, void *v, register_t *retval)
882 {
883 	struct sys_getsockopt_args /* {
884 		syscallarg(int)			s;
885 		syscallarg(int)			level;
886 		syscallarg(int)			name;
887 		syscallarg(void *)		val;
888 		syscallarg(unsigned int *)	avalsize;
889 	} */ *uap = v;
890 	struct proc	*p;
891 	struct file	*fp;
892 	struct mbuf	*m;
893 	unsigned int	op, i, valsize;
894 	int		error;
895 
896 	p = l->l_proc;
897 	m = NULL;
898 	/* getsock() will use the descriptor for us */
899 	if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
900 		return (error);
901 	if (SCARG(uap, val)) {
902 		error = copyin((caddr_t)SCARG(uap, avalsize),
903 			       (caddr_t)&valsize, sizeof(valsize));
904 		if (error)
905 			goto out;
906 	} else
907 		valsize = 0;
908 	if ((error = sogetopt((struct socket *)fp->f_data, SCARG(uap, level),
909 	    SCARG(uap, name), &m)) == 0 && SCARG(uap, val) && valsize &&
910 	    m != NULL) {
911 		op = 0;
912 		while (m && !error && op < valsize) {
913 			i = min(m->m_len, (valsize - op));
914 			error = copyout(mtod(m, caddr_t), SCARG(uap, val), i);
915 			op += i;
916 			SCARG(uap, val) = ((u_int8_t *)SCARG(uap, val)) + i;
917 			m = m_free(m);
918 		}
919 		valsize = op;
920 		if (error == 0)
921 			error = copyout(&valsize,
922 					SCARG(uap, avalsize), sizeof(valsize));
923 	}
924 	if (m != NULL)
925 		(void) m_freem(m);
926  out:
927 	FILE_UNUSE(fp, p);
928 	return (error);
929 }
930 
931 #ifdef PIPE_SOCKETPAIR
932 /* ARGSUSED */
933 int
934 sys_pipe(struct lwp *l, void *v, register_t *retval)
935 {
936 	struct proc	*p;
937 	struct filedesc	*fdp;
938 	struct file	*rf, *wf;
939 	struct socket	*rso, *wso;
940 	int		fd, error;
941 
942 	p = l->l_proc;
943 	fdp = p->p_fd;
944 	if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, p)) != 0)
945 		return (error);
946 	if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, p)) != 0)
947 		goto free1;
948 	/* remember this socket pair implements a pipe */
949 	wso->so_state |= SS_ISAPIPE;
950 	rso->so_state |= SS_ISAPIPE;
951 	/* falloc() will use the descriptor for us */
952 	if ((error = falloc(p, &rf, &fd)) != 0)
953 		goto free2;
954 	retval[0] = fd;
955 	rf->f_flag = FREAD;
956 	rf->f_type = DTYPE_SOCKET;
957 	rf->f_ops = &socketops;
958 	rf->f_data = (caddr_t)rso;
959 	if ((error = falloc(p, &wf, &fd)) != 0)
960 		goto free3;
961 	wf->f_flag = FWRITE;
962 	wf->f_type = DTYPE_SOCKET;
963 	wf->f_ops = &socketops;
964 	wf->f_data = (caddr_t)wso;
965 	retval[1] = fd;
966 	if ((error = unp_connect2(wso, rso, PRU_CONNECT2)) != 0)
967 		goto free4;
968 	FILE_SET_MATURE(rf);
969 	FILE_SET_MATURE(wf);
970 	FILE_UNUSE(rf, p);
971 	FILE_UNUSE(wf, p);
972 	return (0);
973  free4:
974 	FILE_UNUSE(wf, p);
975 	ffree(wf);
976 	fdremove(fdp, retval[1]);
977  free3:
978 	FILE_UNUSE(rf, p);
979 	ffree(rf);
980 	fdremove(fdp, retval[0]);
981  free2:
982 	(void)soclose(wso);
983  free1:
984 	(void)soclose(rso);
985 	return (error);
986 }
987 #endif /* PIPE_SOCKETPAIR */
988 
989 /*
990  * Get socket name.
991  */
992 /* ARGSUSED */
993 int
994 sys_getsockname(struct lwp *l, void *v, register_t *retval)
995 {
996 	struct sys_getsockname_args /* {
997 		syscallarg(int)			fdes;
998 		syscallarg(struct sockaddr *)	asa;
999 		syscallarg(unsigned int *)	alen;
1000 	} */ *uap = v;
1001 	struct proc	*p;
1002 	struct file	*fp;
1003 	struct socket	*so;
1004 	struct mbuf	*m;
1005 	unsigned int	len;
1006 	int		error;
1007 
1008 	p = l->l_proc;
1009 	/* getsock() will use the descriptor for us */
1010 	if ((error = getsock(p->p_fd, SCARG(uap, fdes), &fp)) != 0)
1011 		return (error);
1012 	error = copyin((caddr_t)SCARG(uap, alen), (caddr_t)&len, sizeof(len));
1013 	if (error)
1014 		goto out;
1015 	so = (struct socket *)fp->f_data;
1016 	m = m_getclr(M_WAIT, MT_SONAME);
1017 	MCLAIM(m, so->so_mowner);
1018 	error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, (struct mbuf *)0,
1019 	    m, (struct mbuf *)0, (struct proc *)0);
1020 	if (error)
1021 		goto bad;
1022 	if (len > m->m_len)
1023 		len = m->m_len;
1024 	error = copyout(mtod(m, caddr_t), (caddr_t)SCARG(uap, asa), len);
1025 	if (error == 0)
1026 		error = copyout((caddr_t)&len, (caddr_t)SCARG(uap, alen),
1027 		    sizeof(len));
1028  bad:
1029 	m_freem(m);
1030  out:
1031 	FILE_UNUSE(fp, p);
1032 	return (error);
1033 }
1034 
1035 /*
1036  * Get name of peer for connected socket.
1037  */
1038 /* ARGSUSED */
1039 int
1040 sys_getpeername(struct lwp *l, void *v, register_t *retval)
1041 {
1042 	struct sys_getpeername_args /* {
1043 		syscallarg(int)			fdes;
1044 		syscallarg(struct sockaddr *)	asa;
1045 		syscallarg(unsigned int *)	alen;
1046 	} */ *uap = v;
1047 	struct proc	*p;
1048 	struct file	*fp;
1049 	struct socket	*so;
1050 	struct mbuf	*m;
1051 	unsigned int	len;
1052 	int		error;
1053 
1054 	p = l->l_proc;
1055 	/* getsock() will use the descriptor for us */
1056 	if ((error = getsock(p->p_fd, SCARG(uap, fdes), &fp)) != 0)
1057 		return (error);
1058 	so = (struct socket *)fp->f_data;
1059 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1060 		error = ENOTCONN;
1061 		goto out;
1062 	}
1063 	error = copyin((caddr_t)SCARG(uap, alen), (caddr_t)&len, sizeof(len));
1064 	if (error)
1065 		goto out;
1066 	m = m_getclr(M_WAIT, MT_SONAME);
1067 	MCLAIM(m, so->so_mowner);
1068 	error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, (struct mbuf *)0,
1069 	    m, (struct mbuf *)0, (struct proc *)0);
1070 	if (error)
1071 		goto bad;
1072 	if (len > m->m_len)
1073 		len = m->m_len;
1074 	error = copyout(mtod(m, caddr_t), (caddr_t)SCARG(uap, asa), len);
1075 	if (error)
1076 		goto bad;
1077 	error = copyout((caddr_t)&len, (caddr_t)SCARG(uap, alen), sizeof(len));
1078  bad:
1079 	m_freem(m);
1080  out:
1081 	FILE_UNUSE(fp, p);
1082 	return (error);
1083 }
1084 
1085 /*
1086  * XXX In a perfect world, we wouldn't pass around socket control
1087  * XXX arguments in mbufs, and this could go away.
1088  */
1089 int
1090 sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
1091 {
1092 	struct sockaddr	*sa;
1093 	struct mbuf	*m;
1094 	int		error;
1095 
1096 	/*
1097 	 * We can't allow socket names > UCHAR_MAX in length, since that
1098 	 * will overflow sa_len.  Control data more than a page size in
1099 	 * length is just too much.
1100 	 */
1101 	if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1102 		return (EINVAL);
1103 
1104 	/* Allocate an mbuf to hold the arguments. */
1105 	m = m_get(M_WAIT, type);
1106 	/* can't claim.  don't who to assign it to. */
1107 	if (buflen > MLEN) {
1108 		/*
1109 		 * Won't fit into a regular mbuf, so we allocate just
1110 		 * enough external storage to hold the argument.
1111 		 */
1112 		MEXTMALLOC(m, buflen, M_WAITOK);
1113 	}
1114 	m->m_len = buflen;
1115 	error = copyin(bf, mtod(m, caddr_t), buflen);
1116 	if (error) {
1117 		(void) m_free(m);
1118 		return (error);
1119 	}
1120 	*mp = m;
1121 	if (type == MT_SONAME) {
1122 		sa = mtod(m, struct sockaddr *);
1123 #if BYTE_ORDER != BIG_ENDIAN
1124 		/*
1125 		 * 4.3BSD compat thing - need to stay, since bind(2),
1126 		 * connect(2), sendto(2) were not versioned for COMPAT_43.
1127 		 */
1128 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1129 			sa->sa_family = sa->sa_len;
1130 #endif
1131 		sa->sa_len = buflen;
1132 	}
1133 	return (0);
1134 }
1135 
1136 int
1137 getsock(struct filedesc *fdp, int fdes, struct file **fpp)
1138 {
1139 	struct file	*fp;
1140 
1141 	if ((fp = fd_getfile(fdp, fdes)) == NULL)
1142 		return (EBADF);
1143 
1144 	FILE_USE(fp);
1145 
1146 	if (fp->f_type != DTYPE_SOCKET) {
1147 		FILE_UNUSE(fp, NULL);
1148 		return (ENOTSOCK);
1149 	}
1150 	*fpp = fp;
1151 	return (0);
1152 }
1153