xref: /openbsd-src/sys/kern/uipc_syscalls.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: uipc_syscalls.c,v 1.187 2020/09/29 11:48:54 claudio Exp $	*/
2 /*	$NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/filedesc.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/ioctl.h>
42 #include <sys/malloc.h>
43 #include <sys/event.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/signalvar.h>
49 #include <sys/pledge.h>
50 #include <sys/unpcb.h>
51 #include <sys/un.h>
52 #ifdef KTRACE
53 #include <sys/ktrace.h>
54 #endif
55 
56 #include <sys/mount.h>
57 #include <sys/syscallargs.h>
58 
59 #include <sys/domain.h>
60 #include <netinet/in.h>
61 #include <net/route.h>
62 
63 int	copyaddrout(struct proc *, struct mbuf *, struct sockaddr *, socklen_t,
64 	    socklen_t *);
65 
66 int
67 sys_socket(struct proc *p, void *v, register_t *retval)
68 {
69 	struct sys_socket_args /* {
70 		syscallarg(int) domain;
71 		syscallarg(int) type;
72 		syscallarg(int) protocol;
73 	} */ *uap = v;
74 	struct filedesc *fdp = p->p_fd;
75 	struct socket *so;
76 	struct file *fp;
77 	int type = SCARG(uap, type);
78 	int domain = SCARG(uap, domain);
79 	int fd, cloexec, nonblock, fflag, error;
80 	unsigned int ss = 0;
81 
82 	if ((type & SOCK_DNS) && !(domain == AF_INET || domain == AF_INET6))
83 		return (EINVAL);
84 
85 	if (ISSET(type, SOCK_DNS))
86 		ss |= SS_DNS;
87 	error = pledge_socket(p, domain, ss);
88 	if (error)
89 		return (error);
90 
91 	type &= ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS);
92 	cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
93 	nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
94 	fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
95 
96 	error = socreate(SCARG(uap, domain), &so, type, SCARG(uap, protocol));
97 	if (error)
98 		return (error);
99 
100 	fdplock(fdp);
101 	error = falloc(p, &fp, &fd);
102 	if (error) {
103 		fdpunlock(fdp);
104 		soclose(so, MSG_DONTWAIT);
105 	} else {
106 		fp->f_flag = fflag;
107 		fp->f_type = DTYPE_SOCKET;
108 		fp->f_ops = &socketops;
109 		so->so_state |= ss;
110 		fp->f_data = so;
111 		fdinsert(fdp, fd, cloexec, fp);
112 		fdpunlock(fdp);
113 		FRELE(fp, p);
114 		*retval = fd;
115 	}
116 	return (error);
117 }
118 
119 static inline int
120 isdnssocket(struct socket *so)
121 {
122 	return (so->so_state & SS_DNS);
123 }
124 
125 /* For SS_DNS sockets, only allow port DNS (port 53) */
126 static int
127 dns_portcheck(struct proc *p, struct socket *so, void *nam, u_int *namelen)
128 {
129 	int error = EINVAL;
130 
131 	switch (so->so_proto->pr_domain->dom_family) {
132 	case AF_INET:
133 		if (*namelen < sizeof(struct sockaddr_in))
134 			break;
135 		if (((struct sockaddr_in *)nam)->sin_port == htons(53))
136 			error = 0;
137 		break;
138 #ifdef INET6
139 	case AF_INET6:
140 		if (*namelen < sizeof(struct sockaddr_in6))
141 			break;
142 		if (((struct sockaddr_in6 *)nam)->sin6_port == htons(53))
143 			error = 0;
144 #endif
145 	}
146 	if (error && p->p_p->ps_flags & PS_PLEDGE)
147 		return (pledge_fail(p, EPERM, PLEDGE_DNS));
148 	return error;
149 }
150 
151 int
152 sys_bind(struct proc *p, void *v, register_t *retval)
153 {
154 	struct sys_bind_args /* {
155 		syscallarg(int) s;
156 		syscallarg(const struct sockaddr *) name;
157 		syscallarg(socklen_t) namelen;
158 	} */ *uap = v;
159 	struct file *fp;
160 	struct mbuf *nam;
161 	struct socket *so;
162 	int s, error;
163 
164 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
165 		return (error);
166 	so = fp->f_data;
167 	error = pledge_socket(p, so->so_proto->pr_domain->dom_family,
168 	    so->so_state);
169 	if (error)
170 		goto out;
171 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
172 	    MT_SONAME);
173 	if (error)
174 		goto out;
175 #ifdef KTRACE
176 	if (KTRPOINT(p, KTR_STRUCT))
177 		ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
178 #endif
179 	s = solock(so);
180 	error = sobind(so, nam, p);
181 	sounlock(so, s);
182 	m_freem(nam);
183 out:
184 	FRELE(fp, p);
185 	return (error);
186 }
187 
188 int
189 sys_listen(struct proc *p, void *v, register_t *retval)
190 {
191 	struct sys_listen_args /* {
192 		syscallarg(int) s;
193 		syscallarg(int) backlog;
194 	} */ *uap = v;
195 	struct file *fp;
196 	struct socket *so;
197 	int s, error;
198 
199 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
200 		return (error);
201 	so = fp->f_data;
202 	s = solock(so);
203 	error = solisten(so, SCARG(uap, backlog));
204 	sounlock(so, s);
205 	FRELE(fp, p);
206 	return (error);
207 }
208 
209 int
210 sys_accept(struct proc *p, void *v, register_t *retval)
211 {
212 	struct sys_accept_args /* {
213 		syscallarg(int) s;
214 		syscallarg(struct sockaddr *) name;
215 		syscallarg(socklen_t *) anamelen;
216 	} */ *uap = v;
217 
218 	return (doaccept(p, SCARG(uap, s), SCARG(uap, name),
219 	    SCARG(uap, anamelen), SOCK_NONBLOCK_INHERIT, retval));
220 }
221 
222 int
223 sys_accept4(struct proc *p, void *v, register_t *retval)
224 {
225 	struct sys_accept4_args /* {
226 		syscallarg(int) s;
227 		syscallarg(struct sockaddr *) name;
228 		syscallarg(socklen_t *) anamelen;
229 		syscallarg(socklen_t *) int flags;
230 	} */ *uap = v;
231 
232 	if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
233 		return (EINVAL);
234 
235 	return (doaccept(p, SCARG(uap, s), SCARG(uap, name),
236 	    SCARG(uap, anamelen), SCARG(uap, flags), retval));
237 }
238 
239 int
240 doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
241     int flags, register_t *retval)
242 {
243 	struct filedesc *fdp = p->p_fd;
244 	struct file *fp, *headfp;
245 	struct mbuf *nam;
246 	socklen_t namelen;
247 	int error, s, tmpfd;
248 	struct socket *head, *so;
249 	int cloexec, nflag;
250 
251 	cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
252 
253 	if (name && (error = copyin(anamelen, &namelen, sizeof (namelen))))
254 		return (error);
255 	if ((error = getsock(p, sock, &fp)) != 0)
256 		return (error);
257 
258 	headfp = fp;
259 
260 	fdplock(fdp);
261 	error = falloc(p, &fp, &tmpfd);
262 	fdpunlock(fdp);
263 	if (error) {
264 		FRELE(headfp, p);
265 		return (error);
266 	}
267 
268 	nam = m_get(M_WAIT, MT_SONAME);
269 
270 	head = headfp->f_data;
271 	s = solock(head);
272 	if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
273 		error = EINVAL;
274 		goto out;
275 	}
276 	if ((headfp->f_flag & FNONBLOCK) && head->so_qlen == 0) {
277 		if (head->so_state & SS_CANTRCVMORE)
278 			error = ECONNABORTED;
279 		else
280 			error = EWOULDBLOCK;
281 		goto out;
282 	}
283 	while (head->so_qlen == 0 && head->so_error == 0) {
284 		if (head->so_state & SS_CANTRCVMORE) {
285 			head->so_error = ECONNABORTED;
286 			break;
287 		}
288 		error = sosleep_nsec(head, &head->so_timeo, PSOCK | PCATCH,
289 		    "netcon", INFSLP);
290 		if (error)
291 			goto out;
292 	}
293 	if (head->so_error) {
294 		error = head->so_error;
295 		head->so_error = 0;
296 		goto out;
297 	}
298 
299 	/*
300 	 * Do not sleep after we have taken the socket out of the queue.
301 	 */
302 	so = TAILQ_FIRST(&head->so_q);
303 	if (soqremque(so, 1) == 0)
304 		panic("accept");
305 
306 	/* Figure out whether the new socket should be non-blocking. */
307 	nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
308 	    : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
309 
310 	/* connection has been removed from the listen queue */
311 	KNOTE(&head->so_rcv.sb_sel.si_note, NOTE_SUBMIT);
312 
313 	fp->f_type = DTYPE_SOCKET;
314 	fp->f_flag = FREAD | FWRITE | nflag;
315 	fp->f_ops = &socketops;
316 	fp->f_data = so;
317 	error = soaccept(so, nam);
318 	if (!error && name != NULL)
319 		error = copyaddrout(p, nam, name, namelen, anamelen);
320 out:
321 	if (!error) {
322 		sounlock(head, s);
323 		fdplock(fdp);
324 		fdinsert(fdp, tmpfd, cloexec, fp);
325 		fdpunlock(fdp);
326 		FRELE(fp, p);
327 		*retval = tmpfd;
328 	} else {
329 		sounlock(head, s);
330 		fdplock(fdp);
331 		fdremove(fdp, tmpfd);
332 		fdpunlock(fdp);
333 		closef(fp, p);
334 	}
335 
336 	m_freem(nam);
337 	FRELE(headfp, p);
338 	return (error);
339 }
340 
341 int
342 sys_connect(struct proc *p, void *v, register_t *retval)
343 {
344 	struct sys_connect_args /* {
345 		syscallarg(int) s;
346 		syscallarg(const struct sockaddr *) name;
347 		syscallarg(socklen_t) namelen;
348 	} */ *uap = v;
349 	struct file *fp;
350 	struct socket *so;
351 	struct mbuf *nam = NULL;
352 	int error, s, interrupted = 0;
353 
354 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
355 		return (error);
356 	so = fp->f_data;
357 	s = solock(so);
358 	if (so->so_state & SS_ISCONNECTING) {
359 		error = EALREADY;
360 		goto out;
361 	}
362 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
363 	    MT_SONAME);
364 	if (error)
365 		goto out;
366 	error = pledge_socket(p, so->so_proto->pr_domain->dom_family,
367 	    so->so_state);
368 	if (error)
369 		goto out;
370 #ifdef KTRACE
371 	if (KTRPOINT(p, KTR_STRUCT))
372 		ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
373 #endif
374 
375 	if (isdnssocket(so)) {
376 		u_int namelen = nam->m_len;
377 		error = dns_portcheck(p, so, mtod(nam, void *), &namelen);
378 		if (error)
379 			goto out;
380 		nam->m_len = namelen;
381 	}
382 
383 	error = soconnect(so, nam);
384 	if (error)
385 		goto bad;
386 	if ((fp->f_flag & FNONBLOCK) && (so->so_state & SS_ISCONNECTING)) {
387 		error = EINPROGRESS;
388 		goto out;
389 	}
390 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
391 		error = sosleep_nsec(so, &so->so_timeo, PSOCK | PCATCH,
392 		    "netcon2", INFSLP);
393 		if (error) {
394 			if (error == EINTR || error == ERESTART)
395 				interrupted = 1;
396 			break;
397 		}
398 	}
399 	if (error == 0) {
400 		error = so->so_error;
401 		so->so_error = 0;
402 	}
403 bad:
404 	if (!interrupted)
405 		so->so_state &= ~SS_ISCONNECTING;
406 out:
407 	sounlock(so, s);
408 	FRELE(fp, p);
409 	m_freem(nam);
410 	if (error == ERESTART)
411 		error = EINTR;
412 	return (error);
413 }
414 
415 int
416 sys_socketpair(struct proc *p, void *v, register_t *retval)
417 {
418 	struct sys_socketpair_args /* {
419 		syscallarg(int) domain;
420 		syscallarg(int) type;
421 		syscallarg(int) protocol;
422 		syscallarg(int *) rsv;
423 	} */ *uap = v;
424 	struct filedesc *fdp = p->p_fd;
425 	struct file *fp1 = NULL, *fp2 = NULL;
426 	struct socket *so1, *so2;
427 	int type, cloexec, nonblock, fflag, error, sv[2];
428 
429 	type  = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
430 	cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
431 	nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
432 	fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
433 
434 	error = socreate(SCARG(uap, domain), &so1, type, SCARG(uap, protocol));
435 	if (error)
436 		return (error);
437 	error = socreate(SCARG(uap, domain), &so2, type, SCARG(uap, protocol));
438 	if (error)
439 		goto free1;
440 
441 	error = soconnect2(so1, so2);
442 	if (error != 0)
443 		goto free2;
444 
445 	if ((SCARG(uap, type) & SOCK_TYPE_MASK) == SOCK_DGRAM) {
446 		/*
447 		 * Datagram socket connection is asymmetric.
448 		 */
449 		error = soconnect2(so2, so1);
450 		if (error != 0)
451 			goto free2;
452 	}
453 	fdplock(fdp);
454 	if ((error = falloc(p, &fp1, &sv[0])) != 0)
455 		goto free3;
456 	fp1->f_flag = fflag;
457 	fp1->f_type = DTYPE_SOCKET;
458 	fp1->f_ops = &socketops;
459 	fp1->f_data = so1;
460 	if ((error = falloc(p, &fp2, &sv[1])) != 0)
461 		goto free4;
462 	fp2->f_flag = fflag;
463 	fp2->f_type = DTYPE_SOCKET;
464 	fp2->f_ops = &socketops;
465 	fp2->f_data = so2;
466 	error = copyout(sv, SCARG(uap, rsv), 2 * sizeof (int));
467 	if (error == 0) {
468 #ifdef KTRACE
469 		if (KTRPOINT(p, KTR_STRUCT))
470 			ktrfds(p, sv, 2);
471 #endif
472 		fdinsert(fdp, sv[0], cloexec, fp1);
473 		fdinsert(fdp, sv[1], cloexec, fp2);
474 		fdpunlock(fdp);
475 		FRELE(fp1, p);
476 		FRELE(fp2, p);
477 		return (0);
478 	}
479 	fdremove(fdp, sv[1]);
480 free4:
481 	fdremove(fdp, sv[0]);
482 free3:
483 	fdpunlock(fdp);
484 
485 	if (fp2 != NULL) {
486 		closef(fp2, p);
487 		so2 = NULL;
488 	}
489 	if (fp1 != NULL) {
490 		closef(fp1, p);
491 		so1 = NULL;
492 	}
493 free2:
494 	if (so2 != NULL)
495 		(void)soclose(so2, 0);
496 free1:
497 	if (so1 != NULL)
498 		(void)soclose(so1, 0);
499 	return (error);
500 }
501 
502 int
503 sys_sendto(struct proc *p, void *v, register_t *retval)
504 {
505 	struct sys_sendto_args /* {
506 		syscallarg(int) s;
507 		syscallarg(const void *) buf;
508 		syscallarg(size_t) len;
509 		syscallarg(int) flags;
510 		syscallarg(const struct sockaddr *) to;
511 		syscallarg(socklen_t) tolen;
512 	} */ *uap = v;
513 	struct msghdr msg;
514 	struct iovec aiov;
515 
516 	msg.msg_name = (caddr_t)SCARG(uap, to);
517 	msg.msg_namelen = SCARG(uap, tolen);
518 	msg.msg_iov = &aiov;
519 	msg.msg_iovlen = 1;
520 	msg.msg_control = 0;
521 	msg.msg_flags = 0;
522 	aiov.iov_base = (char *)SCARG(uap, buf);
523 	aiov.iov_len = SCARG(uap, len);
524 	return (sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval));
525 }
526 
527 int
528 sys_sendmsg(struct proc *p, void *v, register_t *retval)
529 {
530 	struct sys_sendmsg_args /* {
531 		syscallarg(int) s;
532 		syscallarg(const struct msghdr *) msg;
533 		syscallarg(int) flags;
534 	} */ *uap = v;
535 	struct msghdr msg;
536 	struct iovec aiov[UIO_SMALLIOV], *iov;
537 	int error;
538 
539 	error = copyin(SCARG(uap, msg), &msg, sizeof (msg));
540 	if (error)
541 		return (error);
542 #ifdef KTRACE
543 	if (KTRPOINT(p, KTR_STRUCT))
544 		ktrmsghdr(p, &msg);
545 #endif
546 
547 	if (msg.msg_iovlen > IOV_MAX)
548 		return (EMSGSIZE);
549 	if (msg.msg_iovlen > UIO_SMALLIOV)
550 		iov = mallocarray(msg.msg_iovlen, sizeof(struct iovec),
551 		    M_IOV, M_WAITOK);
552 	else
553 		iov = aiov;
554 	if (msg.msg_iovlen &&
555 	    (error = copyin(msg.msg_iov, iov,
556 		    msg.msg_iovlen * sizeof (struct iovec))))
557 		goto done;
558 #ifdef KTRACE
559 	if (msg.msg_iovlen && KTRPOINT(p, KTR_STRUCT))
560 		ktriovec(p, iov, msg.msg_iovlen);
561 #endif
562 	msg.msg_iov = iov;
563 	msg.msg_flags = 0;
564 	error = sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
565 done:
566 	if (iov != aiov)
567 		free(iov, M_IOV, sizeof(struct iovec) * msg.msg_iovlen);
568 	return (error);
569 }
570 
571 int
572 sendit(struct proc *p, int s, struct msghdr *mp, int flags, register_t *retsize)
573 {
574 	struct file *fp;
575 	struct uio auio;
576 	struct iovec *iov;
577 	int i;
578 	struct mbuf *to, *control;
579 	struct socket *so;
580 	size_t len;
581 	int error;
582 #ifdef KTRACE
583 	struct iovec *ktriov = NULL;
584 	int iovlen = 0;
585 #endif
586 
587 	to = NULL;
588 
589 	if ((error = getsock(p, s, &fp)) != 0)
590 		return (error);
591 	so = fp->f_data;
592 	if (fp->f_flag & FNONBLOCK)
593 		flags |= MSG_DONTWAIT;
594 
595 	error = pledge_sendit(p, mp->msg_name);
596 	if (error)
597 		goto bad;
598 
599 	auio.uio_iov = mp->msg_iov;
600 	auio.uio_iovcnt = mp->msg_iovlen;
601 	auio.uio_segflg = UIO_USERSPACE;
602 	auio.uio_rw = UIO_WRITE;
603 	auio.uio_procp = p;
604 	auio.uio_offset = 0;			/* XXX */
605 	auio.uio_resid = 0;
606 	iov = mp->msg_iov;
607 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
608 		/* Don't allow sum > SSIZE_MAX */
609 		if (iov->iov_len > SSIZE_MAX ||
610 		    (auio.uio_resid += iov->iov_len) > SSIZE_MAX) {
611 			error = EINVAL;
612 			goto bad;
613 		}
614 	}
615 	if (mp->msg_name) {
616 		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
617 		    MT_SONAME);
618 		if (error)
619 			goto bad;
620 		if (isdnssocket(so)) {
621 			u_int namelen = mp->msg_namelen;
622 			error = dns_portcheck(p, so, mtod(to, caddr_t),
623 			    &namelen);
624 			if (error)
625 				goto bad;
626 			mp->msg_namelen = namelen;
627 		}
628 #ifdef KTRACE
629 		if (KTRPOINT(p, KTR_STRUCT))
630 			ktrsockaddr(p, mtod(to, caddr_t), mp->msg_namelen);
631 #endif
632 	}
633 	if (mp->msg_control) {
634 		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
635 			error = EINVAL;
636 			goto bad;
637 		}
638 		error = sockargs(&control, mp->msg_control,
639 		    mp->msg_controllen, MT_CONTROL);
640 		if (error)
641 			goto bad;
642 #ifdef KTRACE
643 		if (KTRPOINT(p, KTR_STRUCT) && mp->msg_controllen)
644 			ktrcmsghdr(p, mtod(control, char *),
645 			    mp->msg_controllen);
646 #endif
647 	} else
648 		control = 0;
649 #ifdef KTRACE
650 	if (KTRPOINT(p, KTR_GENIO)) {
651 		ktriov = mallocarray(auio.uio_iovcnt, sizeof(struct iovec),
652 		    M_TEMP, M_WAITOK);
653 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
654 
655 		memcpy(ktriov, auio.uio_iov, iovlen);
656 	}
657 #endif
658 	len = auio.uio_resid;
659 	error = sosend(so, to, &auio, NULL, control, flags);
660 	if (error) {
661 		if (auio.uio_resid != len && (error == ERESTART ||
662 		    error == EINTR || error == EWOULDBLOCK))
663 			error = 0;
664 		if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
665 			KERNEL_LOCK();
666 			ptsignal(p, SIGPIPE, STHREAD);
667 			KERNEL_UNLOCK();
668 		}
669 	}
670 	if (error == 0) {
671 		*retsize = len - auio.uio_resid;
672 		mtx_enter(&fp->f_mtx);
673 		fp->f_wxfer++;
674 		fp->f_wbytes += *retsize;
675 		mtx_leave(&fp->f_mtx);
676 	}
677 #ifdef KTRACE
678 	if (ktriov != NULL) {
679 		if (error == 0)
680 			ktrgenio(p, s, UIO_WRITE, ktriov, *retsize);
681 		free(ktriov, M_TEMP, iovlen);
682 	}
683 #endif
684 bad:
685 	FRELE(fp, p);
686 	m_freem(to);
687 	return (error);
688 }
689 
690 int
691 sys_recvfrom(struct proc *p, void *v, register_t *retval)
692 {
693 	struct sys_recvfrom_args /* {
694 		syscallarg(int) s;
695 		syscallarg(void *) buf;
696 		syscallarg(size_t) len;
697 		syscallarg(int) flags;
698 		syscallarg(struct sockaddr *) from;
699 		syscallarg(socklen_t *) fromlenaddr;
700 	} */ *uap = v;
701 	struct msghdr msg;
702 	struct iovec aiov;
703 	int error;
704 
705 	if (SCARG(uap, fromlenaddr)) {
706 		error = copyin(SCARG(uap, fromlenaddr),
707 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
708 		if (error)
709 			return (error);
710 	} else
711 		msg.msg_namelen = 0;
712 	msg.msg_name = (caddr_t)SCARG(uap, from);
713 	msg.msg_iov = &aiov;
714 	msg.msg_iovlen = 1;
715 	aiov.iov_base = SCARG(uap, buf);
716 	aiov.iov_len = SCARG(uap, len);
717 	msg.msg_control = 0;
718 	msg.msg_flags = SCARG(uap, flags);
719 	return (recvit(p, SCARG(uap, s), &msg,
720 	    (caddr_t)SCARG(uap, fromlenaddr), retval));
721 }
722 
723 int
724 sys_recvmsg(struct proc *p, void *v, register_t *retval)
725 {
726 	struct sys_recvmsg_args /* {
727 		syscallarg(int) s;
728 		syscallarg(struct msghdr *) msg;
729 		syscallarg(int) flags;
730 	} */ *uap = v;
731 	struct msghdr msg;
732 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
733 	int error;
734 
735 	error = copyin(SCARG(uap, msg), &msg, sizeof (msg));
736 	if (error)
737 		return (error);
738 
739 	if (msg.msg_iovlen > IOV_MAX)
740 		return (EMSGSIZE);
741 	if (msg.msg_iovlen > UIO_SMALLIOV)
742 		iov = mallocarray(msg.msg_iovlen, sizeof(struct iovec),
743 		    M_IOV, M_WAITOK);
744 	else
745 		iov = aiov;
746 	msg.msg_flags = SCARG(uap, flags);
747 	if (msg.msg_iovlen > 0) {
748 		error = copyin(msg.msg_iov, iov,
749 		    msg.msg_iovlen * sizeof(struct iovec));
750 		if (error)
751 			goto done;
752 	}
753 	uiov = msg.msg_iov;
754 	msg.msg_iov = iov;
755 	if ((error = recvit(p, SCARG(uap, s), &msg, NULL, retval)) == 0) {
756 		msg.msg_iov = uiov;
757 #ifdef KTRACE
758 		if (KTRPOINT(p, KTR_STRUCT)) {
759 			ktrmsghdr(p, &msg);
760 			if (msg.msg_iovlen)
761 				ktriovec(p, iov, msg.msg_iovlen);
762 		}
763 #endif
764 		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
765 	}
766 done:
767 	if (iov != aiov)
768 		free(iov, M_IOV, sizeof(struct iovec) * msg.msg_iovlen);
769 	return (error);
770 }
771 
772 int
773 recvit(struct proc *p, int s, struct msghdr *mp, caddr_t namelenp,
774     register_t *retsize)
775 {
776 	struct file *fp;
777 	struct uio auio;
778 	struct iovec *iov;
779 	int i;
780 	size_t len;
781 	int error;
782 	struct mbuf *from = NULL, *control = NULL;
783 #ifdef KTRACE
784 	struct iovec *ktriov = NULL;
785 	int iovlen = 0;
786 #endif
787 
788 	if ((error = getsock(p, s, &fp)) != 0)
789 		return (error);
790 
791 	auio.uio_iov = mp->msg_iov;
792 	auio.uio_iovcnt = mp->msg_iovlen;
793 	auio.uio_segflg = UIO_USERSPACE;
794 	auio.uio_rw = UIO_READ;
795 	auio.uio_procp = p;
796 	auio.uio_offset = 0;			/* XXX */
797 	auio.uio_resid = 0;
798 	iov = mp->msg_iov;
799 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
800 		/* Don't allow sum > SSIZE_MAX */
801 		if (iov->iov_len > SSIZE_MAX ||
802 		    (auio.uio_resid += iov->iov_len) > SSIZE_MAX) {
803 			error = EINVAL;
804 			goto out;
805 		}
806 	}
807 #ifdef KTRACE
808 	if (KTRPOINT(p, KTR_GENIO)) {
809 		ktriov = mallocarray(auio.uio_iovcnt, sizeof(struct iovec),
810 		    M_TEMP, M_WAITOK);
811 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
812 
813 		memcpy(ktriov, auio.uio_iov, iovlen);
814 	}
815 #endif
816 	len = auio.uio_resid;
817 	if (fp->f_flag & FNONBLOCK)
818 		mp->msg_flags |= MSG_DONTWAIT;
819 	error = soreceive(fp->f_data, &from, &auio, NULL,
820 			  mp->msg_control ? &control : NULL,
821 			  &mp->msg_flags,
822 			  mp->msg_control ? mp->msg_controllen : 0);
823 	if (error) {
824 		if (auio.uio_resid != len && (error == ERESTART ||
825 		    error == EINTR || error == EWOULDBLOCK))
826 			error = 0;
827 	}
828 #ifdef KTRACE
829 	if (ktriov != NULL) {
830 		if (error == 0)
831 			ktrgenio(p, s, UIO_READ, ktriov, len - auio.uio_resid);
832 		free(ktriov, M_TEMP, iovlen);
833 	}
834 #endif
835 	if (error)
836 		goto out;
837 	*retsize = len - auio.uio_resid;
838 	if (mp->msg_name) {
839 		socklen_t alen;
840 
841 		if (from == NULL)
842 			alen = 0;
843 		else {
844 			alen = from->m_len;
845 			error = copyout(mtod(from, caddr_t), mp->msg_name,
846 			    MIN(alen, mp->msg_namelen));
847 			if (error)
848 				goto out;
849 #ifdef KTRACE
850 			if (KTRPOINT(p, KTR_STRUCT))
851 				ktrsockaddr(p, mtod(from, caddr_t), alen);
852 #endif
853 		}
854 		mp->msg_namelen = alen;
855 		if (namelenp &&
856 		    (error = copyout(&alen, namelenp, sizeof(alen)))) {
857 			goto out;
858 		}
859 	}
860 	if (mp->msg_control) {
861 		len = mp->msg_controllen;
862 		if (len <= 0 || control == NULL)
863 			len = 0;
864 		else {
865 			struct mbuf *m = control;
866 			caddr_t cp = mp->msg_control;
867 
868 			do {
869 				i = m->m_len;
870 				if (len < i) {
871 					mp->msg_flags |= MSG_CTRUNC;
872 					i = len;
873 				}
874 				error = copyout(mtod(m, caddr_t), cp, i);
875 #ifdef KTRACE
876 				if (KTRPOINT(p, KTR_STRUCT) && error == 0 && i)
877 					ktrcmsghdr(p, mtod(m, char *), i);
878 #endif
879 				if (m->m_next)
880 					i = ALIGN(i);
881 				cp += i;
882 				len -= i;
883 				if (error != 0 || len <= 0)
884 					break;
885 			} while ((m = m->m_next) != NULL);
886 			len = cp - (caddr_t)mp->msg_control;
887 		}
888 		mp->msg_controllen = len;
889 	}
890 	if (!error) {
891 		mtx_enter(&fp->f_mtx);
892 		fp->f_rxfer++;
893 		fp->f_rbytes += *retsize;
894 		mtx_leave(&fp->f_mtx);
895 	}
896 out:
897 	FRELE(fp, p);
898 	m_freem(from);
899 	m_freem(control);
900 	return (error);
901 }
902 
903 int
904 sys_shutdown(struct proc *p, void *v, register_t *retval)
905 {
906 	struct sys_shutdown_args /* {
907 		syscallarg(int) s;
908 		syscallarg(int) how;
909 	} */ *uap = v;
910 	struct file *fp;
911 	int error;
912 
913 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
914 		return (error);
915 	error = soshutdown(fp->f_data, SCARG(uap, how));
916 	FRELE(fp, p);
917 	return (error);
918 }
919 
920 int
921 sys_setsockopt(struct proc *p, void *v, register_t *retval)
922 {
923 	struct sys_setsockopt_args /* {
924 		syscallarg(int) s;
925 		syscallarg(int) level;
926 		syscallarg(int) name;
927 		syscallarg(const void *) val;
928 		syscallarg(socklen_t) valsize;
929 	} */ *uap = v;
930 	struct file *fp;
931 	struct mbuf *m = NULL;
932 	struct socket *so;
933 	int s, error;
934 
935 
936 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
937 		return (error);
938 	error = pledge_sockopt(p, 1, SCARG(uap, level), SCARG(uap, name));
939 	if (error)
940 		goto bad;
941 	if (SCARG(uap, valsize) > MCLBYTES) {
942 		error = EINVAL;
943 		goto bad;
944 	}
945 	if (SCARG(uap, val)) {
946 		m = m_get(M_WAIT, MT_SOOPTS);
947 		if (SCARG(uap, valsize) > MLEN) {
948 			MCLGET(m, M_DONTWAIT);
949 			if ((m->m_flags & M_EXT) == 0) {
950 				error = ENOBUFS;
951 				goto bad;
952 			}
953 		}
954 		if (m == NULL) {
955 			error = ENOBUFS;
956 			goto bad;
957 		}
958 		error = copyin(SCARG(uap, val), mtod(m, caddr_t),
959 		    SCARG(uap, valsize));
960 		if (error) {
961 			goto bad;
962 		}
963 		m->m_len = SCARG(uap, valsize);
964 	}
965 	so = fp->f_data;
966 	s = solock(so);
967 	error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
968 	sounlock(so, s);
969 bad:
970 	m_freem(m);
971 	FRELE(fp, p);
972 	return (error);
973 }
974 
975 int
976 sys_getsockopt(struct proc *p, void *v, register_t *retval)
977 {
978 	struct sys_getsockopt_args /* {
979 		syscallarg(int) s;
980 		syscallarg(int) level;
981 		syscallarg(int) name;
982 		syscallarg(void *) val;
983 		syscallarg(socklen_t *) avalsize;
984 	} */ *uap = v;
985 	struct file *fp;
986 	struct mbuf *m = NULL;
987 	socklen_t valsize;
988 	struct socket *so;
989 	int s, error;
990 
991 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
992 		return (error);
993 	error = pledge_sockopt(p, 0, SCARG(uap, level), SCARG(uap, name));
994 	if (error)
995 		goto out;
996 	if (SCARG(uap, val)) {
997 		error = copyin(SCARG(uap, avalsize),
998 		    &valsize, sizeof (valsize));
999 		if (error)
1000 			goto out;
1001 	} else
1002 		valsize = 0;
1003 	m = m_get(M_WAIT, MT_SOOPTS);
1004 	so = fp->f_data;
1005 	s = solock(so);
1006 	error = sogetopt(so, SCARG(uap, level), SCARG(uap, name), m);
1007 	sounlock(so, s);
1008 	if (error == 0 && SCARG(uap, val) && valsize && m != NULL) {
1009 		if (valsize > m->m_len)
1010 			valsize = m->m_len;
1011 		error = copyout(mtod(m, caddr_t), SCARG(uap, val), valsize);
1012 		if (error == 0)
1013 			error = copyout(&valsize,
1014 			    SCARG(uap, avalsize), sizeof (valsize));
1015 	}
1016 	m_free(m);
1017 out:
1018 	FRELE(fp, p);
1019 	return (error);
1020 }
1021 
1022 /*
1023  * Get socket name.
1024  */
1025 int
1026 sys_getsockname(struct proc *p, void *v, register_t *retval)
1027 {
1028 	struct sys_getsockname_args /* {
1029 		syscallarg(int) fdes;
1030 		syscallarg(struct sockaddr *) asa;
1031 		syscallarg(socklen_t *) alen;
1032 	} */ *uap = v;
1033 	struct file *fp;
1034 	struct socket *so;
1035 	struct mbuf *m = NULL;
1036 	socklen_t len;
1037 	int error, s;
1038 
1039 	if ((error = getsock(p, SCARG(uap, fdes), &fp)) != 0)
1040 		return (error);
1041 	error = copyin(SCARG(uap, alen), &len, sizeof (len));
1042 	if (error)
1043 		goto bad;
1044 	so = fp->f_data;
1045 	error = pledge_socket(p, -1, so->so_state);
1046 	if (error)
1047 		goto bad;
1048 	m = m_getclr(M_WAIT, MT_SONAME);
1049 	s = solock(so);
1050 	error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0, p);
1051 	sounlock(so, s);
1052 	if (error)
1053 		goto bad;
1054 	error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
1055 bad:
1056 	FRELE(fp, p);
1057 	m_freem(m);
1058 	return (error);
1059 }
1060 
1061 /*
1062  * Get name of peer for connected socket.
1063  */
1064 int
1065 sys_getpeername(struct proc *p, void *v, register_t *retval)
1066 {
1067 	struct sys_getpeername_args /* {
1068 		syscallarg(int) fdes;
1069 		syscallarg(struct sockaddr *) asa;
1070 		syscallarg(socklen_t *) alen;
1071 	} */ *uap = v;
1072 	struct file *fp;
1073 	struct socket *so;
1074 	struct mbuf *m = NULL;
1075 	socklen_t len;
1076 	int error, s;
1077 
1078 	if ((error = getsock(p, SCARG(uap, fdes), &fp)) != 0)
1079 		return (error);
1080 	so = fp->f_data;
1081 	error = pledge_socket(p, -1, so->so_state);
1082 	if (error)
1083 		goto bad;
1084 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1085 		error = ENOTCONN;
1086 		goto bad;
1087 	}
1088 	error = copyin(SCARG(uap, alen), &len, sizeof (len));
1089 	if (error)
1090 		goto bad;
1091 	m = m_getclr(M_WAIT, MT_SONAME);
1092 	s = solock(so);
1093 	error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0, p);
1094 	sounlock(so, s);
1095 	if (error)
1096 		goto bad;
1097 	error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
1098 bad:
1099 	FRELE(fp, p);
1100 	m_freem(m);
1101 	return (error);
1102 }
1103 
1104 int
1105 sockargs(struct mbuf **mp, const void *buf, size_t buflen, int type)
1106 {
1107 	struct sockaddr *sa;
1108 	struct mbuf *m;
1109 	int error;
1110 
1111 	/*
1112 	 * We can't allow socket names > UCHAR_MAX in length, since that
1113 	 * will overflow sa_len. Also, control data more than MCLBYTES in
1114 	 * length is just too much.
1115 	 * Memory for sa_len and sa_family must exist.
1116 	 */
1117 	if ((buflen > (type == MT_SONAME ? UCHAR_MAX : MCLBYTES)) ||
1118 	    (type == MT_SONAME && buflen < offsetof(struct sockaddr, sa_data)))
1119 		return (EINVAL);
1120 
1121 	/* Allocate an mbuf to hold the arguments. */
1122 	m = m_get(M_WAIT, type);
1123 	if (buflen > MLEN) {
1124 		MCLGET(m, M_WAITOK);
1125 		if ((m->m_flags & M_EXT) == 0) {
1126 			m_free(m);
1127 			return ENOBUFS;
1128 		}
1129 	}
1130 	m->m_len = buflen;
1131 	error = copyin(buf, mtod(m, caddr_t), buflen);
1132 	if (error) {
1133 		(void) m_free(m);
1134 		return (error);
1135 	}
1136 	*mp = m;
1137 	if (type == MT_SONAME) {
1138 		sa = mtod(m, struct sockaddr *);
1139 		sa->sa_len = buflen;
1140 	}
1141 	return (0);
1142 }
1143 
1144 int
1145 getsock(struct proc *p, int fdes, struct file **fpp)
1146 {
1147 	struct file *fp;
1148 
1149 	fp = fd_getfile(p->p_fd, fdes);
1150 	if (fp == NULL)
1151 		return (EBADF);
1152 	if (fp->f_type != DTYPE_SOCKET) {
1153 		FRELE(fp, p);
1154 		return (ENOTSOCK);
1155 	}
1156 	*fpp = fp;
1157 
1158 	return (0);
1159 }
1160 
1161 int
1162 sys_setrtable(struct proc *p, void *v, register_t *retval)
1163 {
1164 	struct sys_setrtable_args /* {
1165 		syscallarg(int) rtableid;
1166 	} */ *uap = v;
1167 	int rtableid, error;
1168 
1169 	rtableid = SCARG(uap, rtableid);
1170 
1171 	if (p->p_p->ps_rtableid == (u_int)rtableid)
1172 		return (0);
1173 	if (p->p_p->ps_rtableid != 0 && (error = suser(p)) != 0)
1174 		return (error);
1175 	if (rtableid < 0 || !rtable_exists((u_int)rtableid))
1176 		return (EINVAL);
1177 
1178 	p->p_p->ps_rtableid = (u_int)rtableid;
1179 	return (0);
1180 }
1181 
1182 int
1183 sys_getrtable(struct proc *p, void *v, register_t *retval)
1184 {
1185 	*retval = (int)p->p_p->ps_rtableid;
1186 	return (0);
1187 }
1188 
1189 int
1190 copyaddrout(struct proc *p, struct mbuf *name, struct sockaddr *sa,
1191     socklen_t buflen, socklen_t *outlen)
1192 {
1193 	int error;
1194 	socklen_t namelen = name->m_len;
1195 
1196 	/* SHOULD COPY OUT A CHAIN HERE */
1197 	error = copyout(mtod(name, caddr_t), sa, MIN(buflen, namelen));
1198 	if (error == 0) {
1199 #ifdef KTRACE
1200 		if (KTRPOINT(p, KTR_STRUCT))
1201 			ktrsockaddr(p, mtod(name, caddr_t), namelen);
1202 #endif
1203 		error = copyout(&namelen, outlen, sizeof(*outlen));
1204 	}
1205 
1206 	return (error);
1207 }
1208