xref: /openbsd-src/sys/kern/uipc_syscalls.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: uipc_syscalls.c,v 1.133 2016/08/09 02:25:35 guenther Exp $	*/
2 /*	$NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/filedesc.h>
38 #include <sys/proc.h>
39 #include <sys/file.h>
40 #include <sys/ioctl.h>
41 #include <sys/malloc.h>
42 #include <sys/event.h>
43 #include <sys/mbuf.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/signalvar.h>
48 #include <sys/pledge.h>
49 #include <sys/unpcb.h>
50 #include <sys/un.h>
51 #ifdef KTRACE
52 #include <sys/ktrace.h>
53 #endif
54 
55 #include <sys/mount.h>
56 #include <sys/syscallargs.h>
57 
58 #include <sys/domain.h>
59 #include <netinet/in.h>
60 #include <net/route.h>
61 
62 /*
63  * System call interface to the socket abstraction.
64  */
65 extern	struct fileops socketops;
66 
67 int	copyaddrout(struct proc *, struct mbuf *, struct sockaddr *, socklen_t,
68 	    socklen_t *);
69 
70 int
71 sys_socket(struct proc *p, void *v, register_t *retval)
72 {
73 	struct sys_socket_args /* {
74 		syscallarg(int) domain;
75 		syscallarg(int) type;
76 		syscallarg(int) protocol;
77 	} */ *uap = v;
78 	struct filedesc *fdp = p->p_fd;
79 	struct socket *so;
80 	struct file *fp;
81 	int type = SCARG(uap, type);
82 	int domain = SCARG(uap, domain);
83 	int fd, error, ss = 0;
84 
85 	if ((type & SOCK_DNS) && !(domain == AF_INET || domain == AF_INET6))
86 		return (EINVAL);
87 
88 	if (ISSET(type, SOCK_DNS))
89 		ss |= SS_DNS;
90 	error = pledge_socket(p, domain, ss);
91 	if (error)
92 		return (error);
93 
94 	fdplock(fdp);
95 	error = falloc(p, &fp, &fd);
96 	if (error == 0 && (type & SOCK_CLOEXEC))
97 		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
98 	fdpunlock(fdp);
99 	if (error != 0)
100 		goto out;
101 
102 	fp->f_flag = FREAD | FWRITE | (type & SOCK_NONBLOCK ? FNONBLOCK : 0);
103 	fp->f_type = DTYPE_SOCKET;
104 	fp->f_ops = &socketops;
105 	error = socreate(SCARG(uap, domain), &so,
106 	    type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS), SCARG(uap, protocol));
107 	if (error) {
108 		fdplock(fdp);
109 		fdremove(fdp, fd);
110 		closef(fp, p);
111 		fdpunlock(fdp);
112 	} else {
113 		fp->f_data = so;
114 		if (type & SOCK_NONBLOCK)
115 			(*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&type, p);
116 		so->so_state |= ss;
117 		FILE_SET_MATURE(fp, p);
118 		*retval = fd;
119 	}
120 out:
121 	return (error);
122 }
123 
124 static inline int
125 isdnssocket(struct socket *so)
126 {
127 	return (so->so_state & SS_DNS);
128 }
129 
130 /* For SS_DNS sockets, only allow port DNS (port 53) */
131 static int
132 dns_portcheck(struct proc *p, struct socket *so, void *nam, size_t namelen)
133 {
134 	switch (so->so_proto->pr_domain->dom_family) {
135 	case AF_INET:
136 		if (namelen < sizeof(struct sockaddr_in))
137 			break;
138 		if (((struct sockaddr_in *)nam)->sin_port == htons(53))
139 			return (0);
140 		break;
141 	case AF_INET6:
142 		if (namelen < sizeof(struct sockaddr_in6))
143 			break;
144 		if (((struct sockaddr_in6 *)nam)->sin6_port == htons(53))
145 			return (0);
146 	}
147 	if (p->p_p->ps_flags & PS_PLEDGE)
148 		return (pledge_fail(p, EPERM, PLEDGE_DNS));
149 	return (EINVAL);
150 }
151 
152 int
153 sys_bind(struct proc *p, void *v, register_t *retval)
154 {
155 	struct sys_bind_args /* {
156 		syscallarg(int) s;
157 		syscallarg(const struct sockaddr *) name;
158 		syscallarg(socklen_t) namelen;
159 	} */ *uap = v;
160 	struct file *fp;
161 	struct mbuf *nam;
162 	struct socket *so;
163 	int error;
164 
165 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
166 		return (error);
167 	so = fp->f_data;
168 	error = pledge_socket(p, so->so_proto->pr_domain->dom_family,
169 	    so->so_state);
170 	if (error)
171 		goto out;
172 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
173 	    MT_SONAME);
174 	if (error)
175 		goto out;
176 #ifdef KTRACE
177 	if (KTRPOINT(p, KTR_STRUCT))
178 		ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
179 #endif
180 	error = sobind(so, nam, p);
181 	m_freem(nam);
182 out:
183 	FRELE(fp, p);
184 	return (error);
185 }
186 
187 int
188 sys_listen(struct proc *p, void *v, register_t *retval)
189 {
190 	struct sys_listen_args /* {
191 		syscallarg(int) s;
192 		syscallarg(int) backlog;
193 	} */ *uap = v;
194 	struct file *fp;
195 	struct socket *so;
196 	int error;
197 
198 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
199 		return (error);
200 	so = fp->f_data;
201 	error = solisten(so, SCARG(uap, backlog));
202 	FRELE(fp, p);
203 	return (error);
204 }
205 
206 int
207 sys_accept(struct proc *p, void *v, register_t *retval)
208 {
209 	struct sys_accept_args /* {
210 		syscallarg(int) s;
211 		syscallarg(struct sockaddr *) name;
212 		syscallarg(socklen_t *) anamelen;
213 	} */ *uap = v;
214 
215 	return (doaccept(p, SCARG(uap, s), SCARG(uap, name),
216 	    SCARG(uap, anamelen), SOCK_NONBLOCK_INHERIT, retval));
217 }
218 
219 int
220 sys_accept4(struct proc *p, void *v, register_t *retval)
221 {
222 	struct sys_accept4_args /* {
223 		syscallarg(int) s;
224 		syscallarg(struct sockaddr *) name;
225 		syscallarg(socklen_t *) anamelen;
226 		syscallarg(socklen_t *) int flags;
227 	} */ *uap = v;
228 
229 	if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
230 		return (EINVAL);
231 
232 	return (doaccept(p, SCARG(uap, s), SCARG(uap, name),
233 	    SCARG(uap, anamelen), SCARG(uap, flags), retval));
234 }
235 
236 int
237 doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
238     int flags, register_t *retval)
239 {
240 	struct filedesc *fdp = p->p_fd;
241 	struct file *fp, *headfp;
242 	struct mbuf *nam;
243 	socklen_t namelen;
244 	int error, s, tmpfd;
245 	struct socket *head, *so;
246 	int nflag;
247 
248 	if (name && (error = copyin(anamelen, &namelen, sizeof (namelen))))
249 		return (error);
250 	if ((error = getsock(p, sock, &fp)) != 0)
251 		return (error);
252 
253 	s = splsoftnet();
254 	headfp = fp;
255 	head = fp->f_data;
256 
257 	if (isdnssocket((struct socket *)fp->f_data)) {
258 		error = EINVAL;
259 		goto bad;
260 	}
261 redo:
262 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
263 		error = EINVAL;
264 		goto bad;
265 	}
266 	if ((head->so_state & SS_NBIO) && head->so_qlen == 0) {
267 		if (head->so_state & SS_CANTRCVMORE)
268 			error = ECONNABORTED;
269 		else
270 			error = EWOULDBLOCK;
271 		goto bad;
272 	}
273 	while (head->so_qlen == 0 && head->so_error == 0) {
274 		if (head->so_state & SS_CANTRCVMORE) {
275 			head->so_error = ECONNABORTED;
276 			break;
277 		}
278 		error = tsleep(&head->so_timeo, PSOCK | PCATCH, "netcon", 0);
279 		if (error) {
280 			goto bad;
281 		}
282 	}
283 	if (head->so_error) {
284 		error = head->so_error;
285 		head->so_error = 0;
286 		goto bad;
287 	}
288 
289 	/* Figure out whether the new socket should be non-blocking. */
290 	nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
291 	    : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
292 
293 	fdplock(fdp);
294 	error = falloc(p, &fp, &tmpfd);
295 	if (error == 0 && (flags & SOCK_CLOEXEC))
296 		fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE;
297 	fdpunlock(fdp);
298 	if (error != 0) {
299 		/*
300 		 * Probably ran out of file descriptors.  Wakeup
301 		 * so some other process might have a chance at it.
302 		 */
303 		wakeup_one(&head->so_timeo);
304 		goto bad;
305 	}
306 
307 	nam = m_get(M_WAIT, MT_SONAME);
308 
309 	/*
310 	 * Check whether the queue emptied while we slept: falloc() or
311 	 * m_get() may have blocked, allowing the connection to be reset
312 	 * or another thread or process to accept it.  If so, start over.
313 	 */
314 	if (head->so_qlen == 0) {
315 		m_freem(nam);
316 		fdplock(fdp);
317 		fdremove(fdp, tmpfd);
318 		closef(fp, p);
319 		fdpunlock(fdp);
320 		goto redo;
321 	}
322 
323 	/*
324 	 * Do not sleep after we have taken the socket out of the queue.
325 	 */
326 	so = TAILQ_FIRST(&head->so_q);
327 	if (soqremque(so, 1) == 0)
328 		panic("accept");
329 
330 	/* connection has been removed from the listen queue */
331 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
332 
333 	fp->f_type = DTYPE_SOCKET;
334 	fp->f_flag = FREAD | FWRITE | nflag;
335 	fp->f_ops = &socketops;
336 	fp->f_data = so;
337 	error = soaccept(so, nam);
338 	if (!error && name != NULL)
339 		error = copyaddrout(p, nam, name, namelen, anamelen);
340 
341 	if (error) {
342 		/* if an error occurred, free the file descriptor */
343 		fdplock(fdp);
344 		fdremove(fdp, tmpfd);
345 		closef(fp, p);
346 		fdpunlock(fdp);
347 	} else {
348 		(*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&nflag, p);
349 		FILE_SET_MATURE(fp, p);
350 		*retval = tmpfd;
351 	}
352 	m_freem(nam);
353 bad:
354 	splx(s);
355 	FRELE(headfp, p);
356 	return (error);
357 }
358 
359 int
360 sys_connect(struct proc *p, void *v, register_t *retval)
361 {
362 	struct sys_connect_args /* {
363 		syscallarg(int) s;
364 		syscallarg(const struct sockaddr *) name;
365 		syscallarg(socklen_t) namelen;
366 	} */ *uap = v;
367 	struct file *fp;
368 	struct socket *so;
369 	struct mbuf *nam = NULL;
370 	int error, s, interrupted = 0;
371 
372 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
373 		return (error);
374 	so = fp->f_data;
375 	if (so->so_state & SS_ISCONNECTING) {
376 		FRELE(fp, p);
377 		return (EALREADY);
378 	}
379 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
380 	    MT_SONAME);
381 	if (error)
382 		goto bad;
383 	error = pledge_socket(p, so->so_proto->pr_domain->dom_family,
384 	    so->so_state);
385 	if (error)
386 		goto bad;
387 #ifdef KTRACE
388 	if (KTRPOINT(p, KTR_STRUCT))
389 		ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
390 #endif
391 
392 	if (isdnssocket(so)) {
393 		error = dns_portcheck(p, so, mtod(nam, void *), nam->m_len);
394 		if (error) {
395 			FRELE(fp, p);
396 			m_freem(nam);
397 			return (error);
398 		}
399 	}
400 
401 	error = soconnect(so, nam);
402 	if (error)
403 		goto bad;
404 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
405 		FRELE(fp, p);
406 		m_freem(nam);
407 		return (EINPROGRESS);
408 	}
409 	s = splsoftnet();
410 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
411 		error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0);
412 		if (error) {
413 			if (error == EINTR || error == ERESTART)
414 				interrupted = 1;
415 			break;
416 		}
417 	}
418 	if (error == 0) {
419 		error = so->so_error;
420 		so->so_error = 0;
421 	}
422 	splx(s);
423 bad:
424 	if (!interrupted)
425 		so->so_state &= ~SS_ISCONNECTING;
426 	FRELE(fp, p);
427 	if (nam)
428 		m_freem(nam);
429 	if (error == ERESTART)
430 		error = EINTR;
431 	return (error);
432 }
433 
434 int
435 sys_socketpair(struct proc *p, void *v, register_t *retval)
436 {
437 	struct sys_socketpair_args /* {
438 		syscallarg(int) domain;
439 		syscallarg(int) type;
440 		syscallarg(int) protocol;
441 		syscallarg(int *) rsv;
442 	} */ *uap = v;
443 	struct filedesc *fdp = p->p_fd;
444 	struct file *fp1, *fp2;
445 	struct socket *so1, *so2;
446 	int type, flags, fflag, error, sv[2];
447 
448 	type  = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
449 	flags = SCARG(uap, type) &  (SOCK_CLOEXEC | SOCK_NONBLOCK);
450 	fflag = FREAD | FWRITE | (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
451 
452 	error = socreate(SCARG(uap, domain), &so1, type, SCARG(uap, protocol));
453 	if (error)
454 		return (error);
455 	error = socreate(SCARG(uap, domain), &so2, type, SCARG(uap, protocol));
456 	if (error)
457 		goto free1;
458 
459 	fdplock(fdp);
460 	if ((error = falloc(p, &fp1, &sv[0])) != 0)
461 		goto free2;
462 	fp1->f_flag = fflag;
463 	fp1->f_type = DTYPE_SOCKET;
464 	fp1->f_ops = &socketops;
465 	fp1->f_data = so1;
466 	if ((error = falloc(p, &fp2, &sv[1])) != 0)
467 		goto free3;
468 	fp2->f_flag = fflag;
469 	fp2->f_type = DTYPE_SOCKET;
470 	fp2->f_ops = &socketops;
471 	fp2->f_data = so2;
472 	if (flags & SOCK_CLOEXEC) {
473 		fdp->fd_ofileflags[sv[0]] |= UF_EXCLOSE;
474 		fdp->fd_ofileflags[sv[1]] |= UF_EXCLOSE;
475 	}
476 	if ((error = soconnect2(so1, so2)) != 0)
477 		goto free4;
478 	if ((SCARG(uap, type) & SOCK_TYPE_MASK) == SOCK_DGRAM) {
479 		/*
480 		 * Datagram socket connection is asymmetric.
481 		 */
482 		 if ((error = soconnect2(so2, so1)) != 0)
483 			goto free4;
484 	}
485 	error = copyout(sv, SCARG(uap, rsv), 2 * sizeof (int));
486 	if (error == 0) {
487 		if (flags & SOCK_NONBLOCK) {
488 			(*fp1->f_ops->fo_ioctl)(fp1, FIONBIO, (caddr_t)&flags,
489 			    p);
490 			(*fp2->f_ops->fo_ioctl)(fp2, FIONBIO, (caddr_t)&flags,
491 			    p);
492 		}
493 		FILE_SET_MATURE(fp1, p);
494 		FILE_SET_MATURE(fp2, p);
495 		fdpunlock(fdp);
496 		return (0);
497 	}
498 free4:
499 	fdremove(fdp, sv[1]);
500 	closef(fp2, p);
501 	so2 = NULL;
502 free3:
503 	fdremove(fdp, sv[0]);
504 	closef(fp1, p);
505 	so1 = NULL;
506 free2:
507 	if (so2 != NULL)
508 		(void)soclose(so2);
509 	fdpunlock(fdp);
510 free1:
511 	if (so1 != NULL)
512 		(void)soclose(so1);
513 	return (error);
514 }
515 
516 int
517 sys_sendto(struct proc *p, void *v, register_t *retval)
518 {
519 	struct sys_sendto_args /* {
520 		syscallarg(int) s;
521 		syscallarg(const void *) buf;
522 		syscallarg(size_t) len;
523 		syscallarg(int) flags;
524 		syscallarg(const struct sockaddr *) to;
525 		syscallarg(socklen_t) tolen;
526 	} */ *uap = v;
527 	struct msghdr msg;
528 	struct iovec aiov;
529 
530 	msg.msg_name = (caddr_t)SCARG(uap, to);
531 	msg.msg_namelen = SCARG(uap, tolen);
532 	msg.msg_iov = &aiov;
533 	msg.msg_iovlen = 1;
534 	msg.msg_control = 0;
535 	msg.msg_flags = 0;
536 	aiov.iov_base = (char *)SCARG(uap, buf);
537 	aiov.iov_len = SCARG(uap, len);
538 	return (sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval));
539 }
540 
541 int
542 sys_sendmsg(struct proc *p, void *v, register_t *retval)
543 {
544 	struct sys_sendmsg_args /* {
545 		syscallarg(int) s;
546 		syscallarg(const struct msghdr *) msg;
547 		syscallarg(int) flags;
548 	} */ *uap = v;
549 	struct msghdr msg;
550 	struct iovec aiov[UIO_SMALLIOV], *iov;
551 	int error;
552 
553 	error = copyin(SCARG(uap, msg), &msg, sizeof (msg));
554 	if (error)
555 		return (error);
556 #ifdef KTRACE
557 	if (KTRPOINT(p, KTR_STRUCT))
558 		ktrmsghdr(p, &msg);
559 #endif
560 
561 	if (msg.msg_iovlen > IOV_MAX)
562 		return (EMSGSIZE);
563 	if (msg.msg_iovlen > UIO_SMALLIOV)
564 		iov = mallocarray(msg.msg_iovlen, sizeof(struct iovec),
565 		    M_IOV, M_WAITOK);
566 	else
567 		iov = aiov;
568 	if (msg.msg_iovlen &&
569 	    (error = copyin(msg.msg_iov, iov,
570 		    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
571 		goto done;
572 #ifdef KTRACE
573 	if (msg.msg_iovlen && KTRPOINT(p, KTR_STRUCT))
574 		ktriovec(p, iov, msg.msg_iovlen);
575 #endif
576 	msg.msg_iov = iov;
577 	msg.msg_flags = 0;
578 	error = sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
579 done:
580 	if (iov != aiov)
581 		free(iov, M_IOV, sizeof(struct iovec) * msg.msg_iovlen);
582 	return (error);
583 }
584 
585 int
586 sendit(struct proc *p, int s, struct msghdr *mp, int flags, register_t *retsize)
587 {
588 	struct file *fp;
589 	struct uio auio;
590 	struct iovec *iov;
591 	int i;
592 	struct mbuf *to, *control;
593 	struct socket *so;
594 	size_t len;
595 	int error;
596 #ifdef KTRACE
597 	struct iovec *ktriov = NULL;
598 	int iovlen = 0;
599 #endif
600 
601 	to = NULL;
602 
603 	if ((error = getsock(p, s, &fp)) != 0)
604 		return (error);
605 	so = fp->f_data;
606 
607 	error = pledge_sendit(p, mp->msg_name);
608 	if (error)
609 		goto bad;
610 
611 	auio.uio_iov = mp->msg_iov;
612 	auio.uio_iovcnt = mp->msg_iovlen;
613 	auio.uio_segflg = UIO_USERSPACE;
614 	auio.uio_rw = UIO_WRITE;
615 	auio.uio_procp = p;
616 	auio.uio_offset = 0;			/* XXX */
617 	auio.uio_resid = 0;
618 	iov = mp->msg_iov;
619 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
620 		/* Don't allow sum > SSIZE_MAX */
621 		if (iov->iov_len > SSIZE_MAX ||
622 		    (auio.uio_resid += iov->iov_len) > SSIZE_MAX) {
623 			error = EINVAL;
624 			goto bad;
625 		}
626 	}
627 	if (mp->msg_name) {
628 		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
629 		    MT_SONAME);
630 		if (error)
631 			goto bad;
632 		if (isdnssocket(so)) {
633 			error = dns_portcheck(p, so, mtod(to, caddr_t),
634 			    mp->msg_namelen);
635 			if (error)
636 				goto bad;
637 		}
638 #ifdef KTRACE
639 		if (KTRPOINT(p, KTR_STRUCT))
640 		 	ktrsockaddr(p, mtod(to, caddr_t), mp->msg_namelen);
641 #endif
642 	}
643 	if (mp->msg_control) {
644 		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
645 			error = EINVAL;
646 			goto bad;
647 		}
648 		error = sockargs(&control, mp->msg_control,
649 		    mp->msg_controllen, MT_CONTROL);
650 		if (error)
651 			goto bad;
652 #ifdef KTRACE
653 		if (KTRPOINT(p, KTR_STRUCT) && mp->msg_controllen)
654 			ktrcmsghdr(p, mtod(control, char *),
655 			    mp->msg_controllen);
656 #endif
657 	} else
658 		control = 0;
659 #ifdef KTRACE
660 	if (KTRPOINT(p, KTR_GENIO)) {
661 		ktriov = mallocarray(auio.uio_iovcnt, sizeof(struct iovec),
662 		    M_TEMP, M_WAITOK);
663 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
664 
665 		memcpy(ktriov, auio.uio_iov, iovlen);
666 	}
667 #endif
668 	len = auio.uio_resid;
669 	error = sosend(fp->f_data, to, &auio, NULL, control, flags);
670 	if (error) {
671 		if (auio.uio_resid != len && (error == ERESTART ||
672 		    error == EINTR || error == EWOULDBLOCK))
673 			error = 0;
674 		if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0)
675 			ptsignal(p, SIGPIPE, STHREAD);
676 	}
677 	if (error == 0) {
678 		*retsize = len - auio.uio_resid;
679 		fp->f_wxfer++;
680 		fp->f_wbytes += *retsize;
681 	}
682 #ifdef KTRACE
683 	if (ktriov != NULL) {
684 		if (error == 0)
685 			ktrgenio(p, s, UIO_WRITE, ktriov, *retsize);
686 		free(ktriov, M_TEMP, iovlen);
687 	}
688 #endif
689 bad:
690 	FRELE(fp, p);
691 	if (to)
692 		m_freem(to);
693 	return (error);
694 }
695 
696 int
697 sys_recvfrom(struct proc *p, void *v, register_t *retval)
698 {
699 	struct sys_recvfrom_args /* {
700 		syscallarg(int) s;
701 		syscallarg(void *) buf;
702 		syscallarg(size_t) len;
703 		syscallarg(int) flags;
704 		syscallarg(struct sockaddr *) from;
705 		syscallarg(socklen_t *) fromlenaddr;
706 	} */ *uap = v;
707 	struct msghdr msg;
708 	struct iovec aiov;
709 	int error;
710 
711 	if (SCARG(uap, fromlenaddr)) {
712 		error = copyin(SCARG(uap, fromlenaddr),
713 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
714 		if (error)
715 			return (error);
716 	} else
717 		msg.msg_namelen = 0;
718 	msg.msg_name = (caddr_t)SCARG(uap, from);
719 	msg.msg_iov = &aiov;
720 	msg.msg_iovlen = 1;
721 	aiov.iov_base = SCARG(uap, buf);
722 	aiov.iov_len = SCARG(uap, len);
723 	msg.msg_control = 0;
724 	msg.msg_flags = SCARG(uap, flags);
725 	return (recvit(p, SCARG(uap, s), &msg,
726 	    (caddr_t)SCARG(uap, fromlenaddr), retval));
727 }
728 
729 int
730 sys_recvmsg(struct proc *p, void *v, register_t *retval)
731 {
732 	struct sys_recvmsg_args /* {
733 		syscallarg(int) s;
734 		syscallarg(struct msghdr *) msg;
735 		syscallarg(int) flags;
736 	} */ *uap = v;
737 	struct msghdr msg;
738 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
739 	int error;
740 
741 	error = copyin(SCARG(uap, msg), &msg, sizeof (msg));
742 	if (error)
743 		return (error);
744 
745 	if (msg.msg_iovlen > IOV_MAX)
746 		return (EMSGSIZE);
747 	if (msg.msg_iovlen > UIO_SMALLIOV)
748 		iov = mallocarray(msg.msg_iovlen, sizeof(struct iovec),
749 		    M_IOV, M_WAITOK);
750 	else
751 		iov = aiov;
752 	msg.msg_flags = SCARG(uap, flags);
753 	if (msg.msg_iovlen > 0) {
754 		error = copyin(msg.msg_iov, iov,
755 		    msg.msg_iovlen * sizeof(struct iovec));
756 		if (error)
757 			goto done;
758 	}
759 	uiov = msg.msg_iov;
760 	msg.msg_iov = iov;
761 	if ((error = recvit(p, SCARG(uap, s), &msg, NULL, retval)) == 0) {
762 		msg.msg_iov = uiov;
763 #ifdef KTRACE
764 		if (KTRPOINT(p, KTR_STRUCT)) {
765 			ktrmsghdr(p, &msg);
766 			if (msg.msg_iovlen)
767 				ktriovec(p, iov, msg.msg_iovlen);
768 		}
769 #endif
770 		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
771 	}
772 done:
773 	if (iov != aiov)
774 		free(iov, M_IOV, sizeof(struct iovec) * msg.msg_iovlen);
775 	return (error);
776 }
777 
778 int
779 recvit(struct proc *p, int s, struct msghdr *mp, caddr_t namelenp,
780     register_t *retsize)
781 {
782 	struct file *fp;
783 	struct uio auio;
784 	struct iovec *iov;
785 	int i;
786 	size_t len;
787 	int error;
788 	struct mbuf *from = NULL, *control = NULL;
789 #ifdef KTRACE
790 	struct iovec *ktriov = NULL;
791 	int iovlen = 0;
792 #endif
793 
794 	if ((error = getsock(p, s, &fp)) != 0)
795 		return (error);
796 
797 	auio.uio_iov = mp->msg_iov;
798 	auio.uio_iovcnt = mp->msg_iovlen;
799 	auio.uio_segflg = UIO_USERSPACE;
800 	auio.uio_rw = UIO_READ;
801 	auio.uio_procp = p;
802 	auio.uio_offset = 0;			/* XXX */
803 	auio.uio_resid = 0;
804 	iov = mp->msg_iov;
805 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
806 		/* Don't allow sum > SSIZE_MAX */
807 		if (iov->iov_len > SSIZE_MAX ||
808 		    (auio.uio_resid += iov->iov_len) > SSIZE_MAX) {
809 			error = EINVAL;
810 			goto out;
811 		}
812 	}
813 #ifdef KTRACE
814 	if (KTRPOINT(p, KTR_GENIO)) {
815 		ktriov = mallocarray(auio.uio_iovcnt, sizeof(struct iovec),
816 		    M_TEMP, M_WAITOK);
817 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
818 
819 		memcpy(ktriov, auio.uio_iov, iovlen);
820 	}
821 #endif
822 	len = auio.uio_resid;
823 	error = soreceive(fp->f_data, &from, &auio, NULL,
824 			  mp->msg_control ? &control : NULL,
825 			  &mp->msg_flags,
826 			  mp->msg_control ? mp->msg_controllen : 0);
827 	if (error) {
828 		if (auio.uio_resid != len && (error == ERESTART ||
829 		    error == EINTR || error == EWOULDBLOCK))
830 			error = 0;
831 	}
832 #ifdef KTRACE
833 	if (ktriov != NULL) {
834 		if (error == 0)
835 			ktrgenio(p, s, UIO_READ, ktriov, len - auio.uio_resid);
836 		free(ktriov, M_TEMP, iovlen);
837 	}
838 #endif
839 	if (error)
840 		goto out;
841 	*retsize = len - auio.uio_resid;
842 	if (mp->msg_name) {
843 		socklen_t alen;
844 
845 		if (from == NULL)
846 			alen = 0;
847 		else {
848 			alen = from->m_len;
849 			error = copyout(mtod(from, caddr_t), mp->msg_name,
850 			    MIN(alen, mp->msg_namelen));
851 			if (error)
852 				goto out;
853 #ifdef KTRACE
854 			if (KTRPOINT(p, KTR_STRUCT))
855 				ktrsockaddr(p, mtod(from, caddr_t), alen);
856 #endif
857 		}
858 		mp->msg_namelen = alen;
859 		if (namelenp &&
860 		    (error = copyout(&alen, namelenp, sizeof(alen)))) {
861 			goto out;
862 		}
863 	}
864 	if (mp->msg_control) {
865 		len = mp->msg_controllen;
866 		if (len <= 0 || control == NULL)
867 			len = 0;
868 		else {
869 			struct mbuf *m = control;
870 			caddr_t cp = mp->msg_control;
871 
872 			do {
873 				i = m->m_len;
874 				if (len < i) {
875 					mp->msg_flags |= MSG_CTRUNC;
876 					i = len;
877 				}
878 				error = copyout(mtod(m, caddr_t), cp, i);
879 				if (m->m_next)
880 					i = ALIGN(i);
881 				cp += i;
882 				len -= i;
883 				if (error != 0 || len <= 0)
884 					break;
885 #ifdef KTRACE
886 				if (KTRPOINT(p, KTR_STRUCT) && i)
887 					ktrcmsghdr(p, mtod(m, char *), i);
888 #endif
889 			} while ((m = m->m_next) != NULL);
890 			len = cp - (caddr_t)mp->msg_control;
891 		}
892 		mp->msg_controllen = len;
893 	}
894 	if (!error) {
895 		fp->f_rxfer++;
896 		fp->f_rbytes += *retsize;
897 	}
898 out:
899 	FRELE(fp, p);
900 	if (from)
901 		m_freem(from);
902 	if (control)
903 		m_freem(control);
904 	return (error);
905 }
906 
907 int
908 sys_shutdown(struct proc *p, void *v, register_t *retval)
909 {
910 	struct sys_shutdown_args /* {
911 		syscallarg(int) s;
912 		syscallarg(int) how;
913 	} */ *uap = v;
914 	struct file *fp;
915 	int error;
916 
917 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
918 		return (error);
919 	error = soshutdown(fp->f_data, SCARG(uap, how));
920 	FRELE(fp, p);
921 	return (error);
922 }
923 
924 int
925 sys_setsockopt(struct proc *p, void *v, register_t *retval)
926 {
927 	struct sys_setsockopt_args /* {
928 		syscallarg(int) s;
929 		syscallarg(int) level;
930 		syscallarg(int) name;
931 		syscallarg(const void *) val;
932 		syscallarg(socklen_t) valsize;
933 	} */ *uap = v;
934 	struct file *fp;
935 	struct mbuf *m = NULL;
936 	int error;
937 
938 
939 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
940 		return (error);
941 	error = pledge_sockopt(p, 1, SCARG(uap, level), SCARG(uap, name));
942 	if (error)
943 		goto bad;
944 	if (SCARG(uap, valsize) > MCLBYTES) {
945 		error = EINVAL;
946 		goto bad;
947 	}
948 	if (SCARG(uap, val)) {
949 		m = m_get(M_WAIT, MT_SOOPTS);
950 		if (SCARG(uap, valsize) > MLEN) {
951 			MCLGET(m, M_DONTWAIT);
952 			if ((m->m_flags & M_EXT) == 0) {
953 				error = ENOBUFS;
954 				goto bad;
955 			}
956 		}
957 		if (m == NULL) {
958 			error = ENOBUFS;
959 			goto bad;
960 		}
961 		error = copyin(SCARG(uap, val), mtod(m, caddr_t),
962 		    SCARG(uap, valsize));
963 		if (error) {
964 			goto bad;
965 		}
966 		m->m_len = SCARG(uap, valsize);
967 	}
968 	error = sosetopt(fp->f_data, SCARG(uap, level), SCARG(uap, name), m);
969 	m = NULL;
970 bad:
971 	if (m)
972 		m_freem(m);
973 	FRELE(fp, p);
974 	return (error);
975 }
976 
977 int
978 sys_getsockopt(struct proc *p, void *v, register_t *retval)
979 {
980 	struct sys_getsockopt_args /* {
981 		syscallarg(int) s;
982 		syscallarg(int) level;
983 		syscallarg(int) name;
984 		syscallarg(void *) val;
985 		syscallarg(socklen_t *) avalsize;
986 	} */ *uap = v;
987 	struct file *fp;
988 	struct mbuf *m = NULL;
989 	socklen_t valsize;
990 	int error;
991 
992 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
993 		return (error);
994 	error = pledge_sockopt(p, 0, SCARG(uap, level), SCARG(uap, name));
995 	if (error)
996 		goto out;
997 	if (SCARG(uap, val)) {
998 		error = copyin(SCARG(uap, avalsize),
999 		    &valsize, sizeof (valsize));
1000 		if (error)
1001 			goto out;
1002 	} else
1003 		valsize = 0;
1004 	if ((error = sogetopt(fp->f_data, SCARG(uap, level),
1005 	    SCARG(uap, name), &m)) == 0 && SCARG(uap, val) && valsize &&
1006 	    m != NULL) {
1007 		if (valsize > m->m_len)
1008 			valsize = m->m_len;
1009 		error = copyout(mtod(m, caddr_t), SCARG(uap, val), valsize);
1010 		if (error == 0)
1011 			error = copyout(&valsize,
1012 			    SCARG(uap, avalsize), sizeof (valsize));
1013 	}
1014 out:
1015 	FRELE(fp, p);
1016 	if (m != NULL)
1017 		(void)m_free(m);
1018 	return (error);
1019 }
1020 
1021 /*
1022  * Get socket name.
1023  */
1024 int
1025 sys_getsockname(struct proc *p, void *v, register_t *retval)
1026 {
1027 	struct sys_getsockname_args /* {
1028 		syscallarg(int) fdes;
1029 		syscallarg(struct sockaddr *) asa;
1030 		syscallarg(socklen_t *) alen;
1031 	} */ *uap = v;
1032 	struct file *fp;
1033 	struct socket *so;
1034 	struct mbuf *m = NULL;
1035 	socklen_t len;
1036 	int error;
1037 
1038 	if ((error = getsock(p, SCARG(uap, fdes), &fp)) != 0)
1039 		return (error);
1040 	error = copyin(SCARG(uap, alen), &len, sizeof (len));
1041 	if (error)
1042 		goto bad;
1043 	so = fp->f_data;
1044 	error = pledge_socket(p, -1, so->so_state);
1045 	if (error)
1046 		goto bad;
1047 	m = m_getclr(M_WAIT, MT_SONAME);
1048 	error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0, p);
1049 	if (error)
1050 		goto bad;
1051 	error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
1052 bad:
1053 	FRELE(fp, p);
1054 	if (m)
1055 		m_freem(m);
1056 	return (error);
1057 }
1058 
1059 /*
1060  * Get name of peer for connected socket.
1061  */
1062 int
1063 sys_getpeername(struct proc *p, void *v, register_t *retval)
1064 {
1065 	struct sys_getpeername_args /* {
1066 		syscallarg(int) fdes;
1067 		syscallarg(struct sockaddr *) asa;
1068 		syscallarg(socklen_t *) alen;
1069 	} */ *uap = v;
1070 	struct file *fp;
1071 	struct socket *so;
1072 	struct mbuf *m = NULL;
1073 	socklen_t len;
1074 	int error;
1075 
1076 	if ((error = getsock(p, SCARG(uap, fdes), &fp)) != 0)
1077 		return (error);
1078 	so = fp->f_data;
1079 	error = pledge_socket(p, -1, so->so_state);
1080 	if (error)
1081 		goto bad;
1082 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1083 		error = ENOTCONN;
1084 		goto bad;
1085 	}
1086 	error = copyin(SCARG(uap, alen), &len, sizeof (len));
1087 	if (error)
1088 		goto bad;
1089 	m = m_getclr(M_WAIT, MT_SONAME);
1090 	error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0, p);
1091 	if (error)
1092 		goto bad;
1093 	error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
1094 bad:
1095 	FRELE(fp, p);
1096 	m_freem(m);
1097 	return (error);
1098 }
1099 
1100 int
1101 sockargs(struct mbuf **mp, const void *buf, size_t buflen, int type)
1102 {
1103 	struct sockaddr *sa;
1104 	struct mbuf *m;
1105 	int error;
1106 
1107 	/*
1108 	 * We can't allow socket names > UCHAR_MAX in length, since that
1109 	 * will overflow sa_len. Also, control data more than MCLBYTES in
1110 	 * length is just too much.
1111 	 */
1112 	if (buflen > (type == MT_SONAME ? UCHAR_MAX : MCLBYTES))
1113 		return (EINVAL);
1114 
1115 	/* Allocate an mbuf to hold the arguments. */
1116 	m = m_get(M_WAIT, type);
1117 	if (buflen > MLEN) {
1118 		MCLGET(m, M_WAITOK);
1119 		if ((m->m_flags & M_EXT) == 0) {
1120 			m_free(m);
1121 			return ENOBUFS;
1122 		}
1123 	}
1124 	m->m_len = buflen;
1125 	error = copyin(buf, mtod(m, caddr_t), buflen);
1126 	if (error) {
1127 		(void) m_free(m);
1128 		return (error);
1129 	}
1130 	*mp = m;
1131 	if (type == MT_SONAME) {
1132 		sa = mtod(m, struct sockaddr *);
1133 		sa->sa_len = buflen;
1134 	}
1135 	return (0);
1136 }
1137 
1138 int
1139 getsock(struct proc *p, int fdes, struct file **fpp)
1140 {
1141 	struct file *fp;
1142 
1143 	if ((fp = fd_getfile(p->p_fd, fdes)) == NULL)
1144 		return (EBADF);
1145 	if (fp->f_type != DTYPE_SOCKET)
1146 		return (ENOTSOCK);
1147 	*fpp = fp;
1148 	FREF(fp);
1149 
1150 	return (0);
1151 }
1152 
1153 int
1154 sys_setrtable(struct proc *p, void *v, register_t *retval)
1155 {
1156 	struct sys_setrtable_args /* {
1157 		syscallarg(int) rtableid;
1158 	} */ *uap = v;
1159 	int rtableid, error;
1160 
1161 	rtableid = SCARG(uap, rtableid);
1162 
1163 	if (p->p_p->ps_rtableid == (u_int)rtableid)
1164 		return (0);
1165 	if (p->p_p->ps_rtableid != 0 && (error = suser(p, 0)) != 0)
1166 		return (error);
1167 	if (rtableid < 0 || !rtable_exists((u_int)rtableid))
1168 		return (EINVAL);
1169 
1170 	p->p_p->ps_rtableid = (u_int)rtableid;
1171 	return (0);
1172 }
1173 
1174 int
1175 sys_getrtable(struct proc *p, void *v, register_t *retval)
1176 {
1177 	*retval = (int)p->p_p->ps_rtableid;
1178 	return (0);
1179 }
1180 
1181 int
1182 copyaddrout(struct proc *p, struct mbuf *name, struct sockaddr *sa,
1183     socklen_t buflen, socklen_t *outlen)
1184 {
1185 	int error;
1186 	socklen_t namelen = name->m_len;
1187 
1188 	/* SHOULD COPY OUT A CHAIN HERE */
1189 	error = copyout(mtod(name, caddr_t), sa, MIN(buflen, namelen));
1190 	if (error == 0) {
1191 #ifdef KTRACE
1192 		if (KTRPOINT(p, KTR_STRUCT))
1193 			ktrsockaddr(p, mtod(name, caddr_t), namelen);
1194 #endif
1195 		error = copyout(&namelen, outlen, sizeof(*outlen));
1196 	}
1197 
1198 	return (error);
1199 }
1200