xref: /netbsd-src/sys/kern/uipc_syscalls.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: uipc_syscalls.c,v 1.123 2007/11/24 07:49:04 dyoung Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)uipc_syscalls.c	8.6 (Berkeley) 2/14/95
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.123 2007/11/24 07:49:04 dyoung Exp $");
36 
37 #include "opt_pipe.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/filedesc.h>
42 #include <sys/proc.h>
43 #include <sys/file.h>
44 #include <sys/buf.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/signalvar.h>
51 #include <sys/un.h>
52 #include <sys/ktrace.h>
53 #include <sys/event.h>
54 
55 #include <sys/mount.h>
56 #include <sys/syscallargs.h>
57 
58 #include <uvm/uvm_extern.h>
59 
60 /*
61  * System call interface to the socket abstraction.
62  */
63 extern const struct fileops socketops;
64 
65 int
66 sys___socket30(struct lwp *l, void *v, register_t *retval)
67 {
68 	struct sys___socket30_args /* {
69 		syscallarg(int)	domain;
70 		syscallarg(int)	type;
71 		syscallarg(int)	protocol;
72 	} */ *uap = v;
73 	int		fd, error;
74 
75 	error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
76 			 SCARG(uap, protocol), l, &fd);
77 	if (error == 0)
78 		*retval = fd;
79 	return error;
80 }
81 
82 /* ARGSUSED */
83 int
84 sys_bind(struct lwp *l, void *v, register_t *retval)
85 {
86 	struct sys_bind_args /* {
87 		syscallarg(int)				s;
88 		syscallarg(const struct sockaddr *)	name;
89 		syscallarg(unsigned int)		namelen;
90 	} */ *uap = v;
91 	struct mbuf	*nam;
92 	int		error;
93 
94 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
95 	    MT_SONAME);
96 	if (error)
97 		return error;
98 
99 	return do_sys_bind(l, SCARG(uap, s), nam);
100 }
101 
102 int
103 do_sys_bind(struct lwp *l, int s, struct mbuf *nam)
104 {
105 	struct file	*fp;
106 	int		error;
107 
108 	/* getsock() will use the descriptor for us */
109 	if ((error = getsock(l->l_proc->p_fd, s, &fp)) != 0) {
110 		m_freem(nam);
111 		return (error);
112 	}
113 	MCLAIM(nam, ((struct socket *)fp->f_data)->so_mowner);
114 	error = sobind(fp->f_data, nam, l);
115 	m_freem(nam);
116 	FILE_UNUSE(fp, l);
117 	return error;
118 }
119 
120 /* ARGSUSED */
121 int
122 sys_listen(struct lwp *l, void *v, register_t *retval)
123 {
124 	struct sys_listen_args /* {
125 		syscallarg(int)	s;
126 		syscallarg(int)	backlog;
127 	} */ *uap = v;
128 	struct file	*fp;
129 	int		error;
130 
131 	/* getsock() will use the descriptor for us */
132 	if ((error = getsock(l->l_proc->p_fd, SCARG(uap, s), &fp)) != 0)
133 		return (error);
134 	error = solisten(fp->f_data, SCARG(uap, backlog));
135 	FILE_UNUSE(fp, l);
136 	return error;
137 }
138 
139 int
140 do_sys_accept(struct lwp *l, int sock, struct mbuf **name, register_t *new_sock)
141 {
142 	struct filedesc	*fdp;
143 	struct file	*fp;
144 	struct mbuf	*nam;
145 	int		error, s, fd;
146 	struct socket	*so;
147 	int		fflag;
148 
149 	fdp = l->l_proc->p_fd;
150 
151 	/* getsock() will use the descriptor for us */
152 	if ((error = getsock(fdp, sock, &fp)) != 0)
153 		return (error);
154 	s = splsoftnet();
155 	so = (struct socket *)fp->f_data;
156 	FILE_UNUSE(fp, l);
157 	if (!(so->so_proto->pr_flags & PR_LISTEN)) {
158 		splx(s);
159 		return (EOPNOTSUPP);
160 	}
161 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
162 		splx(s);
163 		return (EINVAL);
164 	}
165 	if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
166 		splx(s);
167 		return (EWOULDBLOCK);
168 	}
169 	while (so->so_qlen == 0 && so->so_error == 0) {
170 		if (so->so_state & SS_CANTRCVMORE) {
171 			so->so_error = ECONNABORTED;
172 			break;
173 		}
174 		error = tsleep(&so->so_timeo, PSOCK | PCATCH,
175 		    netcon, 0);
176 		if (error) {
177 			splx(s);
178 			return (error);
179 		}
180 	}
181 	if (so->so_error) {
182 		error = so->so_error;
183 		so->so_error = 0;
184 		splx(s);
185 		return (error);
186 	}
187 	fflag = fp->f_flag;
188 	/* falloc() will use the descriptor for us */
189 	if ((error = falloc(l, &fp, &fd)) != 0) {
190 		splx(s);
191 		return (error);
192 	}
193 	*new_sock = fd;
194 
195 	/* connection has been removed from the listen queue */
196 	KNOTE(&so->so_rcv.sb_sel.sel_klist, 0);
197 
198 	{ struct socket *aso = TAILQ_FIRST(&so->so_q);
199 	  if (soqremque(aso, 1) == 0)
200 		panic("accept");
201 	  so = aso;
202 	}
203 	fp->f_type = DTYPE_SOCKET;
204 	fp->f_flag = fflag;
205 	fp->f_ops = &socketops;
206 	fp->f_data = so;
207 	nam = m_get(M_WAIT, MT_SONAME);
208 	error = soaccept(so, nam);
209 
210 	if (error) {
211 		/* an error occurred, free the file descriptor and mbuf */
212 		m_freem(nam);
213 		fdremove(fdp, fd);
214 		closef(fp, l);
215 	} else {
216 		FILE_SET_MATURE(fp);
217 		FILE_UNUSE(fp, l);
218 		*name = nam;
219 	}
220 	splx(s);
221 	return (error);
222 }
223 
224 int
225 sys_accept(struct lwp *l, void *v, register_t *retval)
226 {
227 	struct sys_accept_args /* {
228 		syscallarg(int)			s;
229 		syscallarg(struct sockaddr *)	name;
230 		syscallarg(unsigned int *)	anamelen;
231 	} */ *uap = v;
232 	int error;
233 	struct mbuf *name;
234 
235 	error = do_sys_accept(l, SCARG(uap, s), &name, retval);
236 	if (error != 0)
237 		return error;
238 
239 	error = copyout_sockname(SCARG(uap, name), SCARG(uap, anamelen),
240 	    MSG_LENUSRSPACE, name);
241 	if (name != NULL)
242 		m_free(name);
243 	if (error != 0)
244 		fdrelease(l, *retval);
245 	return error;
246 }
247 
248 /* ARGSUSED */
249 int
250 sys_connect(struct lwp *l, void *v, register_t *retval)
251 {
252 	struct sys_connect_args /* {
253 		syscallarg(int)				s;
254 		syscallarg(const struct sockaddr *)	name;
255 		syscallarg(unsigned int)		namelen;
256 	} */ *uap = v;
257 	int		error;
258 	struct mbuf	*nam;
259 
260 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
261 	    MT_SONAME);
262 	if (error)
263 		return error;
264 	return do_sys_connect(l,  SCARG(uap, s), nam);
265 }
266 
267 int
268 do_sys_connect(struct lwp *l, int s, struct mbuf *nam)
269 {
270 	struct file	*fp;
271 	struct socket	*so;
272 	int		error;
273 	int		interrupted = 0;
274 
275 	/* getsock() will use the descriptor for us */
276 	if ((error = getsock(l->l_proc->p_fd, s, &fp)) != 0) {
277 		m_freem(nam);
278 		return (error);
279 	}
280 	so = fp->f_data;
281 	MCLAIM(nam, so->so_mowner);
282 	if (so->so_state & SS_ISCONNECTING) {
283 		error = EALREADY;
284 		goto out;
285 	}
286 
287 	error = soconnect(so, nam, l);
288 	if (error)
289 		goto bad;
290 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
291 		error = EINPROGRESS;
292 		goto out;
293 	}
294 	s = splsoftnet();
295 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
296 		error = tsleep(&so->so_timeo, PSOCK | PCATCH,
297 			       netcon, 0);
298 		if (error) {
299 			if (error == EINTR || error == ERESTART)
300 				interrupted = 1;
301 			break;
302 		}
303 	}
304 	if (error == 0) {
305 		error = so->so_error;
306 		so->so_error = 0;
307 	}
308 	splx(s);
309  bad:
310 	if (!interrupted)
311 		so->so_state &= ~SS_ISCONNECTING;
312 	if (error == ERESTART)
313 		error = EINTR;
314  out:
315 	FILE_UNUSE(fp, l);
316 	m_freem(nam);
317 	return (error);
318 }
319 
320 int
321 sys_socketpair(struct lwp *l, void *v, register_t *retval)
322 {
323 	struct sys_socketpair_args /* {
324 		syscallarg(int)		domain;
325 		syscallarg(int)		type;
326 		syscallarg(int)		protocol;
327 		syscallarg(int *)	rsv;
328 	} */ *uap = v;
329 	struct filedesc	*fdp;
330 	struct file	*fp1, *fp2;
331 	struct socket	*so1, *so2;
332 	int		fd, error, sv[2];
333 
334 	fdp = l->l_proc->p_fd;
335 	error = socreate(SCARG(uap, domain), &so1, SCARG(uap, type),
336 	    SCARG(uap, protocol), l);
337 	if (error)
338 		return (error);
339 	error = socreate(SCARG(uap, domain), &so2, SCARG(uap, type),
340 	    SCARG(uap, protocol), l);
341 	if (error)
342 		goto free1;
343 	/* falloc() will use the descriptor for us */
344 	if ((error = falloc(l, &fp1, &fd)) != 0)
345 		goto free2;
346 	sv[0] = fd;
347 	fp1->f_flag = FREAD|FWRITE;
348 	fp1->f_type = DTYPE_SOCKET;
349 	fp1->f_ops = &socketops;
350 	fp1->f_data = so1;
351 	if ((error = falloc(l, &fp2, &fd)) != 0)
352 		goto free3;
353 	fp2->f_flag = FREAD|FWRITE;
354 	fp2->f_type = DTYPE_SOCKET;
355 	fp2->f_ops = &socketops;
356 	fp2->f_data = so2;
357 	sv[1] = fd;
358 	if ((error = soconnect2(so1, so2)) != 0)
359 		goto free4;
360 	if (SCARG(uap, type) == SOCK_DGRAM) {
361 		/*
362 		 * Datagram socket connection is asymmetric.
363 		 */
364 		 if ((error = soconnect2(so2, so1)) != 0)
365 			goto free4;
366 	}
367 	error = copyout(sv, SCARG(uap, rsv), 2 * sizeof(int));
368 	FILE_SET_MATURE(fp1);
369 	FILE_SET_MATURE(fp2);
370 	FILE_UNUSE(fp1, l);
371 	FILE_UNUSE(fp2, l);
372 	return (error);
373  free4:
374 	FILE_UNUSE(fp2, l);
375 	ffree(fp2);
376 	fdremove(fdp, sv[1]);
377  free3:
378 	FILE_UNUSE(fp1, l);
379 	ffree(fp1);
380 	fdremove(fdp, sv[0]);
381  free2:
382 	(void)soclose(so2);
383  free1:
384 	(void)soclose(so1);
385 	return (error);
386 }
387 
388 int
389 sys_sendto(struct lwp *l, void *v, register_t *retval)
390 {
391 	struct sys_sendto_args /* {
392 		syscallarg(int)				s;
393 		syscallarg(const void *)		buf;
394 		syscallarg(size_t)			len;
395 		syscallarg(int)				flags;
396 		syscallarg(const struct sockaddr *)	to;
397 		syscallarg(unsigned int)		tolen;
398 	} */ *uap = v;
399 	struct msghdr	msg;
400 	struct iovec	aiov;
401 
402 	msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
403 	msg.msg_namelen = SCARG(uap, tolen);
404 	msg.msg_iov = &aiov;
405 	msg.msg_iovlen = 1;
406 	msg.msg_control = NULL;
407 	msg.msg_flags = 0;
408 	aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
409 	aiov.iov_len = SCARG(uap, len);
410 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
411 }
412 
413 int
414 sys_sendmsg(struct lwp *l, void *v, register_t *retval)
415 {
416 	struct sys_sendmsg_args /* {
417 		syscallarg(int)				s;
418 		syscallarg(const struct msghdr *)	msg;
419 		syscallarg(int)				flags;
420 	} */ *uap = v;
421 	struct msghdr	msg;
422 	int		error;
423 
424 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
425 	if (error)
426 		return (error);
427 
428 	msg.msg_flags = MSG_IOVUSRSPACE;
429 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
430 }
431 
432 int
433 do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
434 		register_t *retsize)
435 {
436 	struct file	*fp;
437 	struct uio	auio;
438 	int		i, len, error, iovlen;
439 	struct mbuf	*to, *control;
440 	struct socket	*so;
441 	struct iovec	*tiov;
442 	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov;
443 	struct iovec	*ktriov = NULL;
444 
445 	ktrkuser("msghdr", mp, sizeof *mp);
446 
447 	/* If the caller passed us stuff in mbufs, we must free them */
448 	if (mp->msg_flags & MSG_NAMEMBUF)
449 		to = mp->msg_name;
450 	else
451 		to = NULL;
452 
453 	if (mp->msg_flags & MSG_CONTROLMBUF)
454 		control = mp->msg_control;
455 	else
456 		control = NULL;
457 
458 	if (mp->msg_flags & MSG_IOVUSRSPACE) {
459 		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
460 			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
461 				error = EMSGSIZE;
462 				goto bad;
463 			}
464 			iov = malloc(sizeof(struct iovec) * mp->msg_iovlen,
465 			    M_IOV, M_WAITOK);
466 		}
467 		if (mp->msg_iovlen != 0) {
468 			error = copyin(mp->msg_iov, iov,
469 			    (size_t)(mp->msg_iovlen * sizeof(struct iovec)));
470 			if (error)
471 				goto bad;
472 		}
473 		mp->msg_iov = iov;
474 	}
475 
476 	auio.uio_iov = mp->msg_iov;
477 	auio.uio_iovcnt = mp->msg_iovlen;
478 	auio.uio_rw = UIO_WRITE;
479 	auio.uio_offset = 0;			/* XXX */
480 	auio.uio_resid = 0;
481 	KASSERT(l == curlwp);
482 	auio.uio_vmspace = l->l_proc->p_vmspace;
483 
484 	for (i = 0, tiov = mp->msg_iov; i < mp->msg_iovlen; i++, tiov++) {
485 #if 0
486 		/* cannot happen; iov_len is unsigned */
487 		if (tiov->iov_len < 0) {
488 			error = EINVAL;
489 			goto bad;
490 		}
491 #endif
492 		/*
493 		 * Writes return ssize_t because -1 is returned on error.
494 		 * Therefore, we must restrict the length to SSIZE_MAX to
495 		 * avoid garbage return values.
496 		 */
497 		auio.uio_resid += tiov->iov_len;
498 		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
499 			error = EINVAL;
500 			goto bad;
501 		}
502 	}
503 
504 	if (mp->msg_name && to == NULL) {
505 		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
506 		    MT_SONAME);
507 		if (error)
508 			goto bad;
509 	}
510 
511 	if (mp->msg_control) {
512 		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
513 			error = EINVAL;
514 			goto bad;
515 		}
516 		if (control == NULL) {
517 			error = sockargs(&control, mp->msg_control,
518 			    mp->msg_controllen, MT_CONTROL);
519 			if (error)
520 				goto bad;
521 		}
522 	}
523 
524 	if (ktrpoint(KTR_GENIO)) {
525 		iovlen = auio.uio_iovcnt * sizeof(struct iovec);
526 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
527 		memcpy(ktriov, auio.uio_iov, iovlen);
528 	}
529 
530 	/* getsock() will use the descriptor for us */
531 	if ((error = getsock(l->l_proc->p_fd, s, &fp)) != 0)
532 		goto bad;
533 	so = (struct socket *)fp->f_data;
534 
535 	if (mp->msg_name)
536 		MCLAIM(to, so->so_mowner);
537 	if (mp->msg_control)
538 		MCLAIM(control, so->so_mowner);
539 
540 	len = auio.uio_resid;
541 	error = (*so->so_send)(so, to, &auio, NULL, control, flags, l);
542 	/* Protocol is responsible for freeing 'control' */
543 	control = NULL;
544 
545 	FILE_UNUSE(fp, l);
546 
547 	if (error) {
548 		if (auio.uio_resid != len && (error == ERESTART ||
549 		    error == EINTR || error == EWOULDBLOCK))
550 			error = 0;
551 		if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
552 			mutex_enter(&proclist_mutex);
553 			psignal(l->l_proc, SIGPIPE);
554 			mutex_exit(&proclist_mutex);
555 		}
556 	}
557 	if (error == 0)
558 		*retsize = len - auio.uio_resid;
559 
560 bad:
561 	if (ktriov != NULL) {
562 		ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
563 		free(ktriov, M_TEMP);
564 	}
565 
566  	if (iov != aiov)
567 		free(iov, M_IOV);
568 	if (to)
569 		m_freem(to);
570 	if (control)
571 		m_freem(control);
572 
573 	return (error);
574 }
575 
576 int
577 sys_recvfrom(struct lwp *l, void *v, register_t *retval)
578 {
579 	struct sys_recvfrom_args /* {
580 		syscallarg(int)			s;
581 		syscallarg(void *)		buf;
582 		syscallarg(size_t)		len;
583 		syscallarg(int)			flags;
584 		syscallarg(struct sockaddr *)	from;
585 		syscallarg(unsigned int *)	fromlenaddr;
586 	} */ *uap = v;
587 	struct msghdr	msg;
588 	struct iovec	aiov;
589 	int		error;
590 	struct mbuf	*from;
591 
592 	msg.msg_name = NULL;
593 	msg.msg_iov = &aiov;
594 	msg.msg_iovlen = 1;
595 	aiov.iov_base = SCARG(uap, buf);
596 	aiov.iov_len = SCARG(uap, len);
597 	msg.msg_control = NULL;
598 	msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;
599 
600 	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
601 	if (error != 0)
602 		return error;
603 
604 	error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
605 	    MSG_LENUSRSPACE, from);
606 	if (from != NULL)
607 		m_free(from);
608 	return error;
609 }
610 
611 int
612 sys_recvmsg(struct lwp *l, void *v, register_t *retval)
613 {
614 	struct sys_recvmsg_args /* {
615 		syscallarg(int)			s;
616 		syscallarg(struct msghdr *)	msg;
617 		syscallarg(int)			flags;
618 	} */ *uap = v;
619 	struct msghdr	msg;
620 	int		error;
621 	struct mbuf	*from, *control;
622 
623 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
624 	if (error)
625 		return (error);
626 
627 	msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
628 
629 	error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
630 	    msg.msg_control != NULL ? &control : NULL, retval);
631 	if (error != 0)
632 		return error;
633 
634 	if (msg.msg_control != NULL)
635 		error = copyout_msg_control(l, &msg, control);
636 
637 	if (error == 0)
638 		error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
639 			from);
640 	if (from != NULL)
641 		m_free(from);
642 	if (error == 0) {
643 		ktrkuser("msghdr", &msg, sizeof msg);
644 		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
645 	}
646 
647 	return (error);
648 }
649 
650 /*
651  * Adjust for a truncated SCM_RIGHTS control message.
652  *  This means closing any file descriptors that aren't present
653  *  in the returned buffer.
654  *  m is the mbuf holding the (already externalized) SCM_RIGHTS message.
655  */
656 static void
657 free_rights(struct mbuf *m, struct lwp *l)
658 {
659 	int nfd;
660 	int i;
661 	int *fdv;
662 
663 	nfd = m->m_len < CMSG_SPACE(sizeof(int)) ? 0
664 	    : (m->m_len - CMSG_SPACE(sizeof(int))) / sizeof(int) + 1;
665 	fdv = (int *) CMSG_DATA(mtod(m,struct cmsghdr *));
666 	for (i = 0; i < nfd; i++)
667 		fdrelease(l, fdv[i]);
668 }
669 
670 void
671 free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
672 {
673 	struct mbuf *next;
674 	struct cmsghdr *cmsg;
675 	bool do_free_rights = false;
676 
677 	while (control != NULL) {
678 		cmsg = mtod(control, struct cmsghdr *);
679 		if (control == uncopied)
680 			do_free_rights = true;
681 		if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
682 		    && cmsg->cmsg_type == SCM_RIGHTS)
683 			free_rights(control, l);
684 		next = control->m_next;
685 		m_free(control);
686 		control = next;
687 	}
688 }
689 
690 /* Copy socket control/CMSG data to user buffer, frees the mbuf */
691 int
692 copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
693 {
694 	int i, len, error = 0;
695 	struct cmsghdr *cmsg;
696 	struct mbuf *m;
697 	char *q;
698 
699 	len = mp->msg_controllen;
700 	if (len <= 0 || control == 0) {
701 		mp->msg_controllen = 0;
702 		free_control_mbuf(l, control, control);
703 		return 0;
704 	}
705 
706 	q = (char *)mp->msg_control;
707 
708 	for (m = control; m != NULL; ) {
709 		cmsg = mtod(m, struct cmsghdr *);
710 		i = m->m_len;
711 		if (len < i) {
712 			mp->msg_flags |= MSG_CTRUNC;
713 			if (cmsg->cmsg_level == SOL_SOCKET
714 			    && cmsg->cmsg_type == SCM_RIGHTS)
715 				/* Do not truncate me ... */
716 				break;
717 			i = len;
718 		}
719 		error = copyout(mtod(m, void *), q, i);
720 		ktrkuser("msgcontrol", mtod(m, void *), i);
721 		if (error != 0) {
722 			/* We must free all the SCM_RIGHTS */
723 			m = control;
724 			break;
725 		}
726 		m = m->m_next;
727 		if (m)
728 			i = ALIGN(i);
729 		q += i;
730 		len -= i;
731 		if (len <= 0)
732 			break;
733 	}
734 
735 	free_control_mbuf(l, control, m);
736 
737 	mp->msg_controllen = q - (char *)mp->msg_control;
738 	return error;
739 }
740 
741 int
742 do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp, struct mbuf **from,
743     struct mbuf **control, register_t *retsize)
744 {
745 	struct file	*fp;
746 	struct uio	auio;
747 	struct iovec	aiov[UIO_SMALLIOV], *iov = aiov;
748 	struct iovec	*tiov;
749 	int		i, len, error, iovlen;
750 	struct socket	*so;
751 	struct iovec	*ktriov;
752 
753 	ktrkuser("msghdr", mp, sizeof *mp);
754 
755 	*from = NULL;
756 	if (control != NULL)
757 		*control = NULL;
758 
759 	/* getsock() will use the descriptor for us */
760 	if ((error = getsock(l->l_proc->p_fd, s, &fp)) != 0)
761 		return (error);
762 	so = (struct socket *)fp->f_data;
763 
764 	if (mp->msg_flags & MSG_IOVUSRSPACE) {
765 		if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
766 			if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
767 				error = EMSGSIZE;
768 				goto out;
769 			}
770 			iov = malloc(sizeof(struct iovec) * mp->msg_iovlen,
771 			    M_IOV, M_WAITOK);
772 		}
773 		if (mp->msg_iovlen != 0) {
774 			error = copyin(mp->msg_iov, iov,
775 			    (size_t)(mp->msg_iovlen * sizeof(struct iovec)));
776 			if (error)
777 				goto out;
778 		}
779 		auio.uio_iov = iov;
780 	} else
781 		auio.uio_iov = mp->msg_iov;
782 	auio.uio_iovcnt = mp->msg_iovlen;
783 	auio.uio_rw = UIO_READ;
784 	auio.uio_offset = 0;			/* XXX */
785 	auio.uio_resid = 0;
786 	KASSERT(l == curlwp);
787 	auio.uio_vmspace = l->l_proc->p_vmspace;
788 
789 	tiov = auio.uio_iov;
790 	for (i = 0; i < mp->msg_iovlen; i++, tiov++) {
791 #if 0
792 		/* cannot happen iov_len is unsigned */
793 		if (tiov->iov_len < 0) {
794 			error = EINVAL;
795 			goto out;
796 		}
797 #endif
798 		/*
799 		 * Reads return ssize_t because -1 is returned on error.
800 		 * Therefore we must restrict the length to SSIZE_MAX to
801 		 * avoid garbage return values.
802 		 */
803 		auio.uio_resid += tiov->iov_len;
804 		if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
805 			error = EINVAL;
806 			goto out;
807 		}
808 	}
809 
810 	ktriov = NULL;
811 	if (ktrpoint(KTR_GENIO)) {
812 		iovlen = auio.uio_iovcnt * sizeof(struct iovec);
813 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
814 		memcpy(ktriov, auio.uio_iov, iovlen);
815 	}
816 
817 	len = auio.uio_resid;
818 	mp->msg_flags &= MSG_USERFLAGS;
819 	error = (*so->so_receive)(so, from, &auio, NULL, control,
820 			  &mp->msg_flags);
821 	len -= auio.uio_resid;
822 	*retsize = len;
823 	if (error != 0 && len != 0
824 	    && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
825 		/* Some data transferred */
826 		error = 0;
827 
828 	if (ktriov != NULL) {
829 		ktrgeniov(s, UIO_READ, ktriov, len, error);
830 		free(ktriov, M_TEMP);
831 	}
832 
833 	if (error != 0) {
834 		m_freem(*from);
835 		*from = NULL;
836 		if (control != NULL) {
837 			free_control_mbuf(l, *control, *control);
838 			*control = NULL;
839 		}
840 	}
841  out:
842 	if (iov != aiov)
843 		free(iov, M_TEMP);
844 	FILE_UNUSE(fp, l);
845 	return (error);
846 }
847 
848 
849 /* ARGSUSED */
850 int
851 sys_shutdown(struct lwp *l, void *v, register_t *retval)
852 {
853 	struct sys_shutdown_args /* {
854 		syscallarg(int)	s;
855 		syscallarg(int)	how;
856 	} */ *uap = v;
857 	struct proc	*p;
858 	struct file	*fp;
859 	int		error;
860 
861 	p = l->l_proc;
862 	/* getsock() will use the descriptor for us */
863 	if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
864 		return (error);
865 	error = soshutdown((struct socket *)fp->f_data, SCARG(uap, how));
866 	FILE_UNUSE(fp, l);
867 	return (error);
868 }
869 
870 /* ARGSUSED */
871 int
872 sys_setsockopt(struct lwp *l, void *v, register_t *retval)
873 {
874 	struct sys_setsockopt_args /* {
875 		syscallarg(int)			s;
876 		syscallarg(int)			level;
877 		syscallarg(int)			name;
878 		syscallarg(const void *)	val;
879 		syscallarg(unsigned int)	valsize;
880 	} */ *uap = v;
881 	struct proc	*p;
882 	struct file	*fp;
883 	struct mbuf	*m;
884 	struct socket	*so;
885 	int		error;
886 	unsigned int	len;
887 
888 	p = l->l_proc;
889 	m = NULL;
890 	/* getsock() will use the descriptor for us */
891 	if ((error = getsock(p->p_fd, SCARG(uap, s), &fp)) != 0)
892 		return (error);
893 	so = (struct socket *)fp->f_data;
894 	len = SCARG(uap, valsize);
895 	if (len > MCLBYTES) {
896 		error = EINVAL;
897 		goto out;
898 	}
899 	if (SCARG(uap, val)) {
900 		m = getsombuf(so, MT_SOOPTS);
901 		if (len > MLEN)
902 			m_clget(m, M_WAIT);
903 		error = copyin(SCARG(uap, val), mtod(m, void *), len);
904 		if (error) {
905 			(void) m_free(m);
906 			goto out;
907 		}
908 		m->m_len = SCARG(uap, valsize);
909 	}
910 	error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
911  out:
912 	FILE_UNUSE(fp, l);
913 	return (error);
914 }
915 
916 /* ARGSUSED */
917 int
918 sys_getsockopt(struct lwp *l, void *v, register_t *retval)
919 {
920 	struct sys_getsockopt_args /* {
921 		syscallarg(int)			s;
922 		syscallarg(int)			level;
923 		syscallarg(int)			name;
924 		syscallarg(void *)		val;
925 		syscallarg(unsigned int *)	avalsize;
926 	} */ *uap = v;
927 	struct file	*fp;
928 	struct mbuf	*m;
929 	unsigned int	op, i, valsize;
930 	int		error;
931 
932 	m = NULL;
933 	/* getsock() will use the descriptor for us */
934 	if ((error = getsock(l->l_proc->p_fd, SCARG(uap, s), &fp)) != 0)
935 		return (error);
936 	if (SCARG(uap, val)) {
937 		error = copyin(SCARG(uap, avalsize),
938 			       &valsize, sizeof(valsize));
939 		if (error)
940 			goto out;
941 	} else
942 		valsize = 0;
943 	if ((error = sogetopt((struct socket *)fp->f_data, SCARG(uap, level),
944 	    SCARG(uap, name), &m)) == 0 && SCARG(uap, val) && valsize &&
945 	    m != NULL) {
946 		op = 0;
947 		while (m && !error && op < valsize) {
948 			i = min(m->m_len, (valsize - op));
949 			error = copyout(mtod(m, void *), SCARG(uap, val), i);
950 			op += i;
951 			SCARG(uap, val) = ((uint8_t *)SCARG(uap, val)) + i;
952 			m = m_free(m);
953 		}
954 		valsize = op;
955 		if (error == 0)
956 			error = copyout(&valsize,
957 					SCARG(uap, avalsize), sizeof(valsize));
958 	}
959 	if (m != NULL)
960 		(void) m_freem(m);
961  out:
962 	FILE_UNUSE(fp, l);
963 	return (error);
964 }
965 
966 #ifdef PIPE_SOCKETPAIR
967 /* ARGSUSED */
968 int
969 sys_pipe(struct lwp *l, void *v, register_t *retval)
970 {
971 	struct filedesc	*fdp;
972 	struct file	*rf, *wf;
973 	struct socket	*rso, *wso;
974 	int		fd, error;
975 
976 	fdp = l->l_proc->p_fd;
977 	if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l)) != 0)
978 		return (error);
979 	if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l)) != 0)
980 		goto free1;
981 	/* remember this socket pair implements a pipe */
982 	wso->so_state |= SS_ISAPIPE;
983 	rso->so_state |= SS_ISAPIPE;
984 	/* falloc() will use the descriptor for us */
985 	if ((error = falloc(l, &rf, &fd)) != 0)
986 		goto free2;
987 	retval[0] = fd;
988 	rf->f_flag = FREAD;
989 	rf->f_type = DTYPE_SOCKET;
990 	rf->f_ops = &socketops;
991 	rf->f_data = rso;
992 	if ((error = falloc(l, &wf, &fd)) != 0)
993 		goto free3;
994 	wf->f_flag = FWRITE;
995 	wf->f_type = DTYPE_SOCKET;
996 	wf->f_ops = &socketops;
997 	wf->f_data = wso;
998 	retval[1] = fd;
999 	if ((error = unp_connect2(wso, rso, PRU_CONNECT2)) != 0)
1000 		goto free4;
1001 	FILE_SET_MATURE(rf);
1002 	FILE_SET_MATURE(wf);
1003 	FILE_UNUSE(rf, l);
1004 	FILE_UNUSE(wf, l);
1005 	return (0);
1006  free4:
1007 	FILE_UNUSE(wf, l);
1008 	ffree(wf);
1009 	fdremove(fdp, retval[1]);
1010  free3:
1011 	FILE_UNUSE(rf, l);
1012 	ffree(rf);
1013 	fdremove(fdp, retval[0]);
1014  free2:
1015 	(void)soclose(wso);
1016  free1:
1017 	(void)soclose(rso);
1018 	return (error);
1019 }
1020 #endif /* PIPE_SOCKETPAIR */
1021 
1022 /*
1023  * Get socket name.
1024  */
1025 /* ARGSUSED */
1026 int
1027 do_sys_getsockname(struct lwp *l, int fd, int which, struct mbuf **nam)
1028 {
1029 	struct file	*fp;
1030 	struct socket	*so;
1031 	struct mbuf	*m;
1032 	int		error;
1033 
1034 	/* getsock() will use the descriptor for us */
1035 	if ((error = getsock(l->l_proc->p_fd, fd, &fp)) != 0)
1036 		return error;
1037 	so = (struct socket *)fp->f_data;
1038 
1039 	if (which == PRU_PEERADDR
1040 	    && (so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
1041 		error = ENOTCONN;
1042 		goto bad;
1043 	}
1044 
1045 	m = m_getclr(M_WAIT, MT_SONAME);
1046 	*nam = m;
1047 	MCLAIM(m, so->so_mowner);
1048 	error = (*so->so_proto->pr_usrreq)(so, which, (struct mbuf *)0,
1049 	    m, (struct mbuf *)0, (struct lwp *)0);
1050 	if (error != 0)
1051 		m_free(m);
1052     bad:
1053 	FILE_UNUSE(fp, l);
1054 	return error;
1055 }
1056 
1057 int
1058 copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
1059     struct mbuf *addr)
1060 {
1061 	int len;
1062 	int error;
1063 
1064 	if (asa == NULL)
1065 		/* Assume application not interested */
1066 		return 0;
1067 
1068 	if (flags & MSG_LENUSRSPACE) {
1069 		error = copyin(alen, &len, sizeof(len));
1070 		if (error)
1071 			return error;
1072 	} else
1073 		len = *alen;
1074 	if (len < 0)
1075 		return EINVAL;
1076 
1077 	if (addr == NULL) {
1078 		len = 0;
1079 		error = 0;
1080 	} else {
1081 		if (len > addr->m_len)
1082 			len = addr->m_len;
1083 		/* Maybe this ought to copy a chain ? */
1084 		ktrkuser("sockname", mtod(addr, void *), len);
1085 		error = copyout(mtod(addr, void *), asa, len);
1086 	}
1087 
1088 	if (error == 0) {
1089 		if (flags & MSG_LENUSRSPACE)
1090 			error = copyout(&len, alen, sizeof(len));
1091 		else
1092 			*alen = len;
1093 	}
1094 
1095 	return error;
1096 }
1097 
1098 /*
1099  * Get socket name.
1100  */
1101 /* ARGSUSED */
1102 int
1103 sys_getsockname(struct lwp *l, void *v, register_t *retval)
1104 {
1105 	struct sys_getsockname_args /* {
1106 		syscallarg(int)			fdes;
1107 		syscallarg(struct sockaddr *)	asa;
1108 		syscallarg(unsigned int *)	alen;
1109 	} */ *uap = v;
1110 	struct mbuf	*m;
1111 	int		error;
1112 
1113 	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_SOCKADDR, &m);
1114 	if (error != 0)
1115 		return error;
1116 
1117 	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1118 	    MSG_LENUSRSPACE, m);
1119 	if (m != NULL)
1120 		m_free(m);
1121 	return error;
1122 }
1123 
1124 /*
1125  * Get name of peer for connected socket.
1126  */
1127 /* ARGSUSED */
1128 int
1129 sys_getpeername(struct lwp *l, void *v, register_t *retval)
1130 {
1131 	struct sys_getpeername_args /* {
1132 		syscallarg(int)			fdes;
1133 		syscallarg(struct sockaddr *)	asa;
1134 		syscallarg(unsigned int *)	alen;
1135 	} */ *uap = v;
1136 	struct mbuf	*m;
1137 	int		error;
1138 
1139 	error = do_sys_getsockname(l, SCARG(uap, fdes), PRU_PEERADDR, &m);
1140 	if (error != 0)
1141 		return error;
1142 
1143 	error = copyout_sockname(SCARG(uap, asa), SCARG(uap, alen),
1144 	    MSG_LENUSRSPACE, m);
1145 	if (m != NULL)
1146 		m_free(m);
1147 	return error;
1148 }
1149 
1150 /*
1151  * XXX In a perfect world, we wouldn't pass around socket control
1152  * XXX arguments in mbufs, and this could go away.
1153  */
1154 int
1155 sockargs(struct mbuf **mp, const void *bf, size_t buflen, int type)
1156 {
1157 	struct sockaddr	*sa;
1158 	struct mbuf	*m;
1159 	int		error;
1160 
1161 	/*
1162 	 * We can't allow socket names > UCHAR_MAX in length, since that
1163 	 * will overflow sa_len.  Control data more than a page size in
1164 	 * length is just too much.
1165 	 */
1166 	if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
1167 		return (EINVAL);
1168 
1169 	/* Allocate an mbuf to hold the arguments. */
1170 	m = m_get(M_WAIT, type);
1171 	/* can't claim.  don't who to assign it to. */
1172 	if (buflen > MLEN) {
1173 		/*
1174 		 * Won't fit into a regular mbuf, so we allocate just
1175 		 * enough external storage to hold the argument.
1176 		 */
1177 		MEXTMALLOC(m, buflen, M_WAITOK);
1178 	}
1179 	m->m_len = buflen;
1180 	error = copyin(bf, mtod(m, void *), buflen);
1181 	if (error) {
1182 		(void) m_free(m);
1183 		return (error);
1184 	}
1185 	ktrkuser("sockargs", mtod(m, void *), buflen);
1186 	*mp = m;
1187 	if (type == MT_SONAME) {
1188 		sa = mtod(m, struct sockaddr *);
1189 #if BYTE_ORDER != BIG_ENDIAN
1190 		/*
1191 		 * 4.3BSD compat thing - need to stay, since bind(2),
1192 		 * connect(2), sendto(2) were not versioned for COMPAT_43.
1193 		 */
1194 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1195 			sa->sa_family = sa->sa_len;
1196 #endif
1197 		sa->sa_len = buflen;
1198 	}
1199 	return (0);
1200 }
1201 
1202 int
1203 getsock(struct filedesc *fdp, int fdes, struct file **fpp)
1204 {
1205 	struct file	*fp;
1206 
1207 	if ((fp = fd_getfile(fdp, fdes)) == NULL)
1208 		return (EBADF);
1209 
1210 	FILE_USE(fp);
1211 
1212 	if (fp->f_type != DTYPE_SOCKET) {
1213 		FILE_UNUSE(fp, NULL);
1214 		return (ENOTSOCK);
1215 	}
1216 	*fpp = fp;
1217 	return (0);
1218 }
1219