xref: /openbsd-src/sys/kern/uipc_usrreq.c (revision f763167468dba5339ed4b14b7ecaca2a397ab0f6)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.119 2017/08/11 19:53:02 bluhm Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 
54 void	uipc_setaddr(const struct unpcb *, struct mbuf *);
55 
56 /* list of all UNIX domain sockets, for unp_gc() */
57 LIST_HEAD(unp_head, unpcb) unp_head = LIST_HEAD_INITIALIZER(unp_head);
58 
59 /*
60  * Stack of sets of files that were passed over a socket but were
61  * not received and need to be closed.
62  */
63 struct	unp_deferral {
64 	SLIST_ENTRY(unp_deferral)	ud_link;
65 	int	ud_n;
66 	/* followed by ud_n struct fdpass */
67 	struct fdpass ud_fp[];
68 };
69 
70 void	unp_discard(struct fdpass *, int);
71 void	unp_mark(struct fdpass *, int);
72 void	unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
73 int	unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
74 
75 /* list of sets of files that were sent over sockets that are now closed */
76 SLIST_HEAD(,unp_deferral) unp_deferred = SLIST_HEAD_INITIALIZER(unp_deferred);
77 
78 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
79 
80 
81 /*
82  * Unix communications domain.
83  *
84  * TODO:
85  *	RDM
86  *	rethink name space problems
87  *	need a proper out-of-band
88  */
89 struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
90 ino_t	unp_ino;			/* prototype for fake inode numbers */
91 
92 void
93 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
94 {
95 	if (unp != NULL && unp->unp_addr != NULL) {
96 		nam->m_len = unp->unp_addr->m_len;
97 		memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
98 		    nam->m_len);
99 	} else {
100 		nam->m_len = sizeof(sun_noname);
101 		memcpy(mtod(nam, struct sockaddr *), &sun_noname,
102 		    nam->m_len);
103 	}
104 }
105 
106 int
107 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
108     struct mbuf *control, struct proc *p)
109 {
110 	struct unpcb *unp = sotounpcb(so);
111 	struct socket *so2;
112 	int error = 0;
113 
114 	if (req == PRU_CONTROL)
115 		return (EOPNOTSUPP);
116 	if (req != PRU_SEND && control && control->m_len) {
117 		error = EOPNOTSUPP;
118 		goto release;
119 	}
120 	if (unp == NULL) {
121 		error = EINVAL;
122 		goto release;
123 	}
124 
125 	NET_ASSERT_UNLOCKED();
126 
127 	switch (req) {
128 
129 	case PRU_DETACH:
130 		unp_detach(unp);
131 		break;
132 
133 	case PRU_BIND:
134 		error = unp_bind(unp, nam, p);
135 		break;
136 
137 	case PRU_LISTEN:
138 		if (unp->unp_vnode == NULL)
139 			error = EINVAL;
140 		break;
141 
142 	case PRU_CONNECT:
143 		error = unp_connect(so, nam, p);
144 		break;
145 
146 	case PRU_CONNECT2:
147 		error = unp_connect2(so, (struct socket *)nam);
148 		break;
149 
150 	case PRU_DISCONNECT:
151 		unp_disconnect(unp);
152 		break;
153 
154 	case PRU_ACCEPT:
155 		/*
156 		 * Pass back name of connected socket,
157 		 * if it was bound and we are still connected
158 		 * (our peer may have closed already!).
159 		 */
160 		uipc_setaddr(unp->unp_conn, nam);
161 		break;
162 
163 	case PRU_SHUTDOWN:
164 		socantsendmore(so);
165 		unp_shutdown(unp);
166 		break;
167 
168 	case PRU_RCVD:
169 		switch (so->so_type) {
170 
171 		case SOCK_DGRAM:
172 			panic("uipc 1");
173 			/*NOTREACHED*/
174 
175 		case SOCK_STREAM:
176 		case SOCK_SEQPACKET:
177 #define	rcv (&so->so_rcv)
178 #define snd (&so2->so_snd)
179 			if (unp->unp_conn == NULL)
180 				break;
181 			so2 = unp->unp_conn->unp_socket;
182 			/*
183 			 * Adjust backpressure on sender
184 			 * and wakeup any waiting to write.
185 			 */
186 			snd->sb_mbcnt = rcv->sb_mbcnt;
187 			snd->sb_cc = rcv->sb_cc;
188 			sowwakeup(so2);
189 #undef snd
190 #undef rcv
191 			break;
192 
193 		default:
194 			panic("uipc 2");
195 		}
196 		break;
197 
198 	case PRU_SEND:
199 		if (control && (error = unp_internalize(control, p)))
200 			break;
201 		switch (so->so_type) {
202 
203 		case SOCK_DGRAM: {
204 			struct sockaddr *from;
205 
206 			if (nam) {
207 				if (unp->unp_conn) {
208 					error = EISCONN;
209 					break;
210 				}
211 				error = unp_connect(so, nam, p);
212 				if (error)
213 					break;
214 			} else {
215 				if (unp->unp_conn == NULL) {
216 					error = ENOTCONN;
217 					break;
218 				}
219 			}
220 			so2 = unp->unp_conn->unp_socket;
221 			if (unp->unp_addr)
222 				from = mtod(unp->unp_addr, struct sockaddr *);
223 			else
224 				from = &sun_noname;
225 			if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
226 				sorwakeup(so2);
227 				m = NULL;
228 				control = NULL;
229 			} else
230 				error = ENOBUFS;
231 			if (nam)
232 				unp_disconnect(unp);
233 			break;
234 		}
235 
236 		case SOCK_STREAM:
237 		case SOCK_SEQPACKET:
238 #define	rcv (&so2->so_rcv)
239 #define	snd (&so->so_snd)
240 			if (so->so_state & SS_CANTSENDMORE) {
241 				error = EPIPE;
242 				break;
243 			}
244 			if (unp->unp_conn == NULL) {
245 				error = ENOTCONN;
246 				break;
247 			}
248 			so2 = unp->unp_conn->unp_socket;
249 			/*
250 			 * Send to paired receive port, and then raise
251 			 * send buffer counts to maintain backpressure.
252 			 * Wake up readers.
253 			 */
254 			if (control) {
255 				if (sbappendcontrol(so2, rcv, m, control))
256 					control = NULL;
257 				else {
258 					error = ENOBUFS;
259 					break;
260 				}
261 			} else if (so->so_type == SOCK_SEQPACKET)
262 				sbappendrecord(so2, rcv, m);
263 			else
264 				sbappend(so2, rcv, m);
265 			snd->sb_mbcnt = rcv->sb_mbcnt;
266 			snd->sb_cc = rcv->sb_cc;
267 			sorwakeup(so2);
268 			m = NULL;
269 #undef snd
270 #undef rcv
271 			break;
272 
273 		default:
274 			panic("uipc 4");
275 		}
276 		/* we need to undo unp_internalize in case of errors */
277 		if (control && error)
278 			unp_dispose(control);
279 		break;
280 
281 	case PRU_ABORT:
282 		unp_drop(unp, ECONNABORTED);
283 		break;
284 
285 	case PRU_SENSE: {
286 		struct stat *sb = (struct stat *)m;
287 
288 		sb->st_blksize = so->so_snd.sb_hiwat;
289 		sb->st_dev = NODEV;
290 		if (unp->unp_ino == 0)
291 			unp->unp_ino = unp_ino++;
292 		sb->st_atim.tv_sec =
293 		    sb->st_mtim.tv_sec =
294 		    sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
295 		sb->st_atim.tv_nsec =
296 		    sb->st_mtim.tv_nsec =
297 		    sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
298 		sb->st_ino = unp->unp_ino;
299 		return (0);
300 	}
301 
302 	case PRU_RCVOOB:
303 		return (EOPNOTSUPP);
304 
305 	case PRU_SENDOOB:
306 		error = EOPNOTSUPP;
307 		break;
308 
309 	case PRU_SOCKADDR:
310 		uipc_setaddr(unp, nam);
311 		break;
312 
313 	case PRU_PEERADDR:
314 		uipc_setaddr(unp->unp_conn, nam);
315 		break;
316 
317 	case PRU_SLOWTIMO:
318 		break;
319 
320 	default:
321 		panic("piusrreq");
322 	}
323 release:
324 	m_freem(control);
325 	m_freem(m);
326 	return (error);
327 }
328 
329 /*
330  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
331  * for stream sockets, although the total for sender and receiver is
332  * actually only PIPSIZ.
333  * Datagram sockets really use the sendspace as the maximum datagram size,
334  * and don't really want to reserve the sendspace.  Their recvspace should
335  * be large enough for at least one max-size datagram plus address.
336  */
337 #define	PIPSIZ	4096
338 u_long	unpst_sendspace = PIPSIZ;
339 u_long	unpst_recvspace = PIPSIZ;
340 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
341 u_long	unpdg_recvspace = 4*1024;
342 
343 int	unp_rights;			/* file descriptors in flight */
344 
345 int
346 uipc_attach(struct socket *so, int proto)
347 {
348 	struct unpcb *unp;
349 	int error;
350 
351 	if (so->so_pcb)
352 		return EISCONN;
353 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
354 		switch (so->so_type) {
355 
356 		case SOCK_STREAM:
357 		case SOCK_SEQPACKET:
358 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
359 			break;
360 
361 		case SOCK_DGRAM:
362 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
363 			break;
364 
365 		default:
366 			panic("unp_attach");
367 		}
368 		if (error)
369 			return (error);
370 	}
371 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT|M_ZERO);
372 	if (unp == NULL)
373 		return (ENOBUFS);
374 	unp->unp_socket = so;
375 	so->so_pcb = unp;
376 	getnanotime(&unp->unp_ctime);
377 	LIST_INSERT_HEAD(&unp_head, unp, unp_link);
378 	return (0);
379 }
380 
381 void
382 unp_detach(struct unpcb *unp)
383 {
384 	struct vnode *vp;
385 
386 	LIST_REMOVE(unp, unp_link);
387 	if (unp->unp_vnode) {
388 		unp->unp_vnode->v_socket = NULL;
389 		vp = unp->unp_vnode;
390 		unp->unp_vnode = NULL;
391 		vrele(vp);
392 	}
393 	if (unp->unp_conn)
394 		unp_disconnect(unp);
395 	while (!SLIST_EMPTY(&unp->unp_refs))
396 		unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET);
397 	soisdisconnected(unp->unp_socket);
398 	unp->unp_socket->so_pcb = NULL;
399 	m_freem(unp->unp_addr);
400 	free(unp, M_PCB, sizeof *unp);
401 	if (unp_rights)
402 		task_add(systq, &unp_gc_task);
403 }
404 
405 int
406 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
407 {
408 	struct sockaddr_un *soun;
409 	struct mbuf *nam2;
410 	struct vnode *vp;
411 	struct vattr vattr;
412 	int error;
413 	struct nameidata nd;
414 	size_t pathlen;
415 
416 	if (unp->unp_vnode != NULL)
417 		return (EINVAL);
418 	if ((error = unp_nam2sun(nam, &soun, &pathlen)))
419 		return (error);
420 
421 	nam2 = m_getclr(M_WAITOK, MT_SONAME);
422 	nam2->m_len = sizeof(struct sockaddr_un);
423 	memcpy(mtod(nam2, struct sockaddr_un *), soun,
424 	    offsetof(struct sockaddr_un, sun_path) + pathlen);
425 	/* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
426 
427 	soun = mtod(nam2, struct sockaddr_un *);
428 
429 	/* Fixup sun_len to keep it in sync with m_len. */
430 	soun->sun_len = nam2->m_len;
431 
432 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
433 	    soun->sun_path, p);
434 	nd.ni_pledge = PLEDGE_UNIX;
435 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
436 	if ((error = namei(&nd)) != 0) {
437 		m_freem(nam2);
438 		return (error);
439 	}
440 	vp = nd.ni_vp;
441 	if (vp != NULL) {
442 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
443 		if (nd.ni_dvp == vp)
444 			vrele(nd.ni_dvp);
445 		else
446 			vput(nd.ni_dvp);
447 		vrele(vp);
448 		m_freem(nam2);
449 		return (EADDRINUSE);
450 	}
451 	VATTR_NULL(&vattr);
452 	vattr.va_type = VSOCK;
453 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
454 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
455 	if (error) {
456 		m_freem(nam2);
457 		return (error);
458 	}
459 	unp->unp_addr = nam2;
460 	vp = nd.ni_vp;
461 	vp->v_socket = unp->unp_socket;
462 	unp->unp_vnode = vp;
463 	unp->unp_connid.uid = p->p_ucred->cr_uid;
464 	unp->unp_connid.gid = p->p_ucred->cr_gid;
465 	unp->unp_connid.pid = p->p_p->ps_pid;
466 	unp->unp_flags |= UNP_FEIDSBIND;
467 	VOP_UNLOCK(vp, p);
468 	return (0);
469 }
470 
471 int
472 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
473 {
474 	struct sockaddr_un *soun;
475 	struct vnode *vp;
476 	struct socket *so2, *so3;
477 	struct unpcb *unp, *unp2, *unp3;
478 	struct nameidata nd;
479 	int error;
480 
481 	if ((error = unp_nam2sun(nam, &soun, NULL)))
482 		return (error);
483 
484 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
485 	nd.ni_pledge = PLEDGE_UNIX;
486 	if ((error = namei(&nd)) != 0)
487 		return (error);
488 	vp = nd.ni_vp;
489 	if (vp->v_type != VSOCK) {
490 		error = ENOTSOCK;
491 		goto bad;
492 	}
493 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
494 		goto bad;
495 	so2 = vp->v_socket;
496 	if (so2 == NULL) {
497 		error = ECONNREFUSED;
498 		goto bad;
499 	}
500 	if (so->so_type != so2->so_type) {
501 		error = EPROTOTYPE;
502 		goto bad;
503 	}
504 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
505 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
506 		    (so3 = sonewconn(so2, 0)) == 0) {
507 			error = ECONNREFUSED;
508 			goto bad;
509 		}
510 		unp = sotounpcb(so);
511 		unp2 = sotounpcb(so2);
512 		unp3 = sotounpcb(so3);
513 		if (unp2->unp_addr)
514 			unp3->unp_addr =
515 			    m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
516 		unp3->unp_connid.uid = p->p_ucred->cr_uid;
517 		unp3->unp_connid.gid = p->p_ucred->cr_gid;
518 		unp3->unp_connid.pid = p->p_p->ps_pid;
519 		unp3->unp_flags |= UNP_FEIDS;
520 		so2 = so3;
521 		if (unp2->unp_flags & UNP_FEIDSBIND) {
522 			unp->unp_connid = unp2->unp_connid;
523 			unp->unp_flags |= UNP_FEIDS;
524 		}
525 	}
526 	error = unp_connect2(so, so2);
527 bad:
528 	vput(vp);
529 	return (error);
530 }
531 
532 int
533 unp_connect2(struct socket *so, struct socket *so2)
534 {
535 	struct unpcb *unp = sotounpcb(so);
536 	struct unpcb *unp2;
537 
538 	if (so2->so_type != so->so_type)
539 		return (EPROTOTYPE);
540 	unp2 = sotounpcb(so2);
541 	unp->unp_conn = unp2;
542 	switch (so->so_type) {
543 
544 	case SOCK_DGRAM:
545 		SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
546 		soisconnected(so);
547 		break;
548 
549 	case SOCK_STREAM:
550 	case SOCK_SEQPACKET:
551 		unp2->unp_conn = unp;
552 		soisconnected(so);
553 		soisconnected(so2);
554 		break;
555 
556 	default:
557 		panic("unp_connect2");
558 	}
559 	return (0);
560 }
561 
562 void
563 unp_disconnect(struct unpcb *unp)
564 {
565 	struct unpcb *unp2 = unp->unp_conn;
566 
567 	if (unp2 == NULL)
568 		return;
569 	unp->unp_conn = NULL;
570 	switch (unp->unp_socket->so_type) {
571 
572 	case SOCK_DGRAM:
573 		SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
574 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
575 		break;
576 
577 	case SOCK_STREAM:
578 	case SOCK_SEQPACKET:
579 		unp->unp_socket->so_snd.sb_mbcnt = 0;
580 		unp->unp_socket->so_snd.sb_cc = 0;
581 		soisdisconnected(unp->unp_socket);
582 		unp2->unp_conn = NULL;
583 		unp2->unp_socket->so_snd.sb_mbcnt = 0;
584 		unp2->unp_socket->so_snd.sb_cc = 0;
585 		soisdisconnected(unp2->unp_socket);
586 		break;
587 	}
588 }
589 
590 void
591 unp_shutdown(struct unpcb *unp)
592 {
593 	struct socket *so;
594 
595 	switch (unp->unp_socket->so_type) {
596 	case SOCK_STREAM:
597 	case SOCK_SEQPACKET:
598 		if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
599 			socantrcvmore(so);
600 		break;
601 	default:
602 		break;
603 	}
604 }
605 
606 void
607 unp_drop(struct unpcb *unp, int errno)
608 {
609 	struct socket *so = unp->unp_socket;
610 
611 	so->so_error = errno;
612 	unp_disconnect(unp);
613 	if (so->so_head) {
614 		so->so_pcb = NULL;
615 		sofree(so);
616 		m_freem(unp->unp_addr);
617 		free(unp, M_PCB, sizeof *unp);
618 	}
619 }
620 
621 #ifdef notdef
622 unp_drain(void)
623 {
624 
625 }
626 #endif
627 
628 extern	struct domain unixdomain;
629 
630 static struct unpcb *
631 fptounp(struct file *fp)
632 {
633 	struct socket *so;
634 
635 	if (fp->f_type != DTYPE_SOCKET)
636 		return (NULL);
637 	if ((so = fp->f_data) == NULL)
638 		return (NULL);
639 	if (so->so_proto->pr_domain != &unixdomain)
640 		return (NULL);
641 	return (sotounpcb(so));
642 }
643 
644 int
645 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
646 {
647 	struct proc *p = curproc;		/* XXX */
648 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
649 	int i, *fdp = NULL;
650 	struct fdpass *rp;
651 	struct file *fp;
652 	int nfds, error = 0;
653 
654 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
655 	    sizeof(struct fdpass);
656 	if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
657 		controllen = 0;
658 	else
659 		controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
660 	if (nfds > controllen / sizeof(int)) {
661 		error = EMSGSIZE;
662 		goto restart;
663 	}
664 
665 	/* Make sure the recipient should be able to see the descriptors.. */
666 	rp = (struct fdpass *)CMSG_DATA(cm);
667 	for (i = 0; i < nfds; i++) {
668 		fp = rp->fp;
669 		rp++;
670 		error = pledge_recvfd(p, fp);
671 		if (error)
672 			break;
673 
674 		/*
675 		 * No to block devices.  If passing a directory,
676 		 * make sure that it is underneath the root.
677 		 */
678 		if (p->p_fd->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
679 			struct vnode *vp = (struct vnode *)fp->f_data;
680 
681 			if (vp->v_type == VBLK ||
682 			    (vp->v_type == VDIR &&
683 			    !vn_isunder(vp, p->p_fd->fd_rdir, p))) {
684 				error = EPERM;
685 				break;
686 			}
687 		}
688 	}
689 
690 	fdp = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
691 
692 restart:
693 	fdplock(p->p_fd);
694 	if (error != 0) {
695 		if (nfds > 0) {
696 			rp = ((struct fdpass *)CMSG_DATA(cm));
697 			unp_discard(rp, nfds);
698 		}
699 		goto out;
700 	}
701 
702 	/*
703 	 * First loop -- allocate file descriptor table slots for the
704 	 * new descriptors.
705 	 */
706 	rp = ((struct fdpass *)CMSG_DATA(cm));
707 	for (i = 0; i < nfds; i++) {
708 		if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
709 			/*
710 			 * Back out what we've done so far.
711 			 */
712 			for (--i; i >= 0; i--)
713 				fdremove(p->p_fd, fdp[i]);
714 
715 			if (error == ENOSPC) {
716 				fdexpand(p);
717 				error = 0;
718 			} else {
719 				/*
720 				 * This is the error that has historically
721 				 * been returned, and some callers may
722 				 * expect it.
723 				 */
724 				error = EMSGSIZE;
725 			}
726 			fdpunlock(p->p_fd);
727 			goto restart;
728 		}
729 
730 		/*
731 		 * Make the slot reference the descriptor so that
732 		 * fdalloc() works properly.. We finalize it all
733 		 * in the loop below.
734 		 */
735 		p->p_fd->fd_ofiles[fdp[i]] = rp->fp;
736 		p->p_fd->fd_ofileflags[fdp[i]] = (rp->flags & UF_PLEDGED);
737 		rp++;
738 
739 		if (flags & MSG_CMSG_CLOEXEC)
740 			p->p_fd->fd_ofileflags[fdp[i]] |= UF_EXCLOSE;
741 	}
742 
743 	/*
744 	 * Now that adding them has succeeded, update all of the
745 	 * descriptor passing state.
746 	 */
747 	rp = (struct fdpass *)CMSG_DATA(cm);
748 	for (i = 0; i < nfds; i++) {
749 		struct unpcb *unp;
750 
751 		fp = rp->fp;
752 		rp++;
753 		if ((unp = fptounp(fp)) != NULL)
754 			unp->unp_msgcount--;
755 		unp_rights--;
756 	}
757 
758 	/*
759 	 * Copy temporary array to message and adjust length, in case of
760 	 * transition from large struct file pointers to ints.
761 	 */
762 	memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
763 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
764 	rights->m_len = CMSG_LEN(nfds * sizeof(int));
765  out:
766 	fdpunlock(p->p_fd);
767 	if (fdp)
768 		free(fdp, M_TEMP, nfds * sizeof(int));
769 	return (error);
770 }
771 
772 int
773 unp_internalize(struct mbuf *control, struct proc *p)
774 {
775 	struct filedesc *fdp = p->p_fd;
776 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
777 	struct fdpass *rp;
778 	struct file *fp;
779 	struct unpcb *unp;
780 	int i, error;
781 	int nfds, *ip, fd, neededspace;
782 
783 	/*
784 	 * Check for two potential msg_controllen values because
785 	 * IETF stuck their nose in a place it does not belong.
786 	 */
787 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
788 	    !(cm->cmsg_len == control->m_len ||
789 	    control->m_len == CMSG_ALIGN(cm->cmsg_len)))
790 		return (EINVAL);
791 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
792 
793 	if (unp_rights + nfds > maxfiles / 10)
794 		return (EMFILE);
795 
796 	/* Make sure we have room for the struct file pointers */
797 morespace:
798 	neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
799 	    control->m_len;
800 	if (neededspace > M_TRAILINGSPACE(control)) {
801 		char *tmp;
802 		/* if we already have a cluster, the message is just too big */
803 		if (control->m_flags & M_EXT)
804 			return (E2BIG);
805 
806 		/* copy cmsg data temporarily out of the mbuf */
807 		tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
808 		memcpy(tmp, mtod(control, caddr_t), control->m_len);
809 
810 		/* allocate a cluster and try again */
811 		MCLGET(control, M_WAIT);
812 		if ((control->m_flags & M_EXT) == 0) {
813 			free(tmp, M_TEMP, control->m_len);
814 			return (ENOBUFS);       /* allocation failed */
815 		}
816 
817 		/* copy the data back into the cluster */
818 		cm = mtod(control, struct cmsghdr *);
819 		memcpy(cm, tmp, control->m_len);
820 		free(tmp, M_TEMP, control->m_len);
821 		goto morespace;
822 	}
823 
824 	/* adjust message & mbuf to note amount of space actually used. */
825 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
826 	control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
827 
828 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
829 	rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
830 	for (i = 0; i < nfds; i++) {
831 		memcpy(&fd, ip, sizeof fd);
832 		ip--;
833 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
834 			error = EBADF;
835 			goto fail;
836 		}
837 		if (fp->f_count == LONG_MAX-2) {
838 			error = EDEADLK;
839 			goto fail;
840 		}
841 		error = pledge_sendfd(p, fp);
842 		if (error)
843 			goto fail;
844 
845 		/* kqueue descriptors cannot be copied */
846 		if (fp->f_type == DTYPE_KQUEUE) {
847 			error = EINVAL;
848 			goto fail;
849 		}
850 		rp->fp = fp;
851 		rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
852 		rp--;
853 		fp->f_count++;
854 		if ((unp = fptounp(fp)) != NULL) {
855 			unp->unp_file = fp;
856 			unp->unp_msgcount++;
857 		}
858 		unp_rights++;
859 	}
860 	return (0);
861 fail:
862 	/* Back out what we just did. */
863 	for ( ; i > 0; i--) {
864 		rp++;
865 		fp = rp->fp;
866 		fp->f_count--;
867 		if ((unp = fptounp(fp)) != NULL)
868 			unp->unp_msgcount--;
869 		unp_rights--;
870 	}
871 
872 	return (error);
873 }
874 
875 int	unp_defer, unp_gcing;
876 
877 void
878 unp_gc(void *arg __unused)
879 {
880 	struct unp_deferral *defer;
881 	struct file *fp;
882 	struct socket *so;
883 	struct unpcb *unp;
884 	int nunref, i;
885 
886 	if (unp_gcing)
887 		return;
888 	unp_gcing = 1;
889 
890 	/* close any fds on the deferred list */
891 	while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
892 		SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
893 		for (i = 0; i < defer->ud_n; i++) {
894 			fp = defer->ud_fp[i].fp;
895 			if (fp == NULL)
896 				continue;
897 			FREF(fp);
898 			if ((unp = fptounp(fp)) != NULL)
899 				unp->unp_msgcount--;
900 			unp_rights--;
901 			(void) closef(fp, NULL);
902 		}
903 		free(defer, M_TEMP, sizeof(*defer) +
904 		    sizeof(struct fdpass) * defer->ud_n);
905 	}
906 
907 	unp_defer = 0;
908 	LIST_FOREACH(unp, &unp_head, unp_link)
909 		unp->unp_flags &= ~(UNP_GCMARK | UNP_GCDEFER | UNP_GCDEAD);
910 	do {
911 		nunref = 0;
912 		LIST_FOREACH(unp, &unp_head, unp_link) {
913 			if (unp->unp_flags & UNP_GCDEFER) {
914 				/*
915 				 * This socket is referenced by another
916 				 * socket which is known to be live,
917 				 * so it's certainly live.
918 				 */
919 				unp->unp_flags &= ~UNP_GCDEFER;
920 				unp_defer--;
921 			} else if (unp->unp_flags & UNP_GCMARK) {
922 				/* marked as live in previous pass */
923 				continue;
924 			} else if ((fp = unp->unp_file) == NULL) {
925 				/* not being passed, so can't be in loop */
926 			} else if (fp->f_count == 0) {
927 				/*
928 				 * Already being closed, let normal close
929 				 * path take its course
930 				 */
931 			} else {
932 				/*
933 				 * Unreferenced by other sockets so far,
934 				 * so if all the references (f_count) are
935 				 * from passing (unp_msgcount) then this
936 				 * socket is prospectively dead
937 				 */
938 				if (fp->f_count == unp->unp_msgcount) {
939 					nunref++;
940 					unp->unp_flags |= UNP_GCDEAD;
941 					continue;
942 				}
943 			}
944 
945 			/*
946 			 * This is the first time we've seen this socket on
947 			 * the mark pass and known it has a live reference,
948 			 * so mark it, then scan its receive buffer for
949 			 * sockets and note them as deferred (== referenced,
950 			 * but not yet marked).
951 			 */
952 			unp->unp_flags |= UNP_GCMARK;
953 
954 			so = unp->unp_socket;
955 #ifdef notdef
956 			if (so->so_rcv.sb_flags & SB_LOCK) {
957 				/*
958 				 * This is problematical; it's not clear
959 				 * we need to wait for the sockbuf to be
960 				 * unlocked (on a uniprocessor, at least),
961 				 * and it's also not clear what to do
962 				 * if sbwait returns an error due to receipt
963 				 * of a signal.  If sbwait does return
964 				 * an error, we'll go into an infinite
965 				 * loop.  Delete all of this for now.
966 				 */
967 				(void) sbwait(&so->so_rcv);
968 				goto restart;
969 			}
970 #endif
971 			unp_scan(so->so_rcv.sb_mb, unp_mark);
972 		}
973 	} while (unp_defer);
974 
975 	/*
976 	 * If there are any unreferenced sockets, then for each dispose
977 	 * of files in its receive buffer and then close it.
978 	 */
979 	if (nunref) {
980 		LIST_FOREACH(unp, &unp_head, unp_link) {
981 			if (unp->unp_flags & UNP_GCDEAD)
982 				unp_scan(unp->unp_socket->so_rcv.sb_mb,
983 				    unp_discard);
984 		}
985 	}
986 	unp_gcing = 0;
987 }
988 
989 void
990 unp_dispose(struct mbuf *m)
991 {
992 
993 	if (m)
994 		unp_scan(m, unp_discard);
995 }
996 
997 void
998 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
999 {
1000 	struct mbuf *m;
1001 	struct fdpass *rp;
1002 	struct cmsghdr *cm;
1003 	int qfds;
1004 
1005 	while (m0) {
1006 		for (m = m0; m; m = m->m_next) {
1007 			if (m->m_type == MT_CONTROL &&
1008 			    m->m_len >= sizeof(*cm)) {
1009 				cm = mtod(m, struct cmsghdr *);
1010 				if (cm->cmsg_level != SOL_SOCKET ||
1011 				    cm->cmsg_type != SCM_RIGHTS)
1012 					continue;
1013 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1014 				    / sizeof(struct fdpass);
1015 				if (qfds > 0) {
1016 					rp = (struct fdpass *)CMSG_DATA(cm);
1017 					op(rp, qfds);
1018 				}
1019 				break;		/* XXX, but saves time */
1020 			}
1021 		}
1022 		m0 = m0->m_nextpkt;
1023 	}
1024 }
1025 
1026 void
1027 unp_mark(struct fdpass *rp, int nfds)
1028 {
1029 	struct unpcb *unp;
1030 	int i;
1031 
1032 	for (i = 0; i < nfds; i++) {
1033 		if (rp[i].fp == NULL)
1034 			continue;
1035 
1036 		unp = fptounp(rp[i].fp);
1037 		if (unp == NULL)
1038 			continue;
1039 
1040 		if (unp->unp_flags & (UNP_GCMARK|UNP_GCDEFER))
1041 			continue;
1042 
1043 		unp_defer++;
1044 		unp->unp_flags |= UNP_GCDEFER;
1045 		unp->unp_flags &= ~UNP_GCDEAD;
1046 	}
1047 }
1048 
1049 void
1050 unp_discard(struct fdpass *rp, int nfds)
1051 {
1052 	struct unp_deferral *defer;
1053 
1054 	/* copy the file pointers to a deferral structure */
1055 	defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1056 	defer->ud_n = nfds;
1057 	memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1058 	memset(rp, 0, sizeof(*rp) * nfds);
1059 	SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1060 
1061 	task_add(systq, &unp_gc_task);
1062 }
1063 
1064 int
1065 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1066 {
1067 	struct sockaddr *sa = mtod(nam, struct sockaddr *);
1068 	size_t size, len;
1069 
1070 	if (nam->m_len < offsetof(struct sockaddr, sa_data))
1071 		return EINVAL;
1072 	if (sa->sa_family != AF_UNIX)
1073 		return EAFNOSUPPORT;
1074 	if (sa->sa_len != nam->m_len)
1075 		return EINVAL;
1076 	if (sa->sa_len > sizeof(struct sockaddr_un))
1077 		return EINVAL;
1078 	*sun = (struct sockaddr_un *)sa;
1079 
1080 	/* ensure that sun_path is NUL terminated and fits */
1081 	size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1082 	len = strnlen((*sun)->sun_path, size);
1083 	if (len == sizeof((*sun)->sun_path))
1084 		return EINVAL;
1085 	if (len == size) {
1086 		if (M_TRAILINGSPACE(nam) == 0)
1087 			return EINVAL;
1088 		nam->m_len++;
1089 		(*sun)->sun_len++;
1090 		(*sun)->sun_path[len] = '\0';
1091 	}
1092 	if (pathlen != NULL)
1093 		*pathlen = len;
1094 
1095 	return 0;
1096 }
1097