xref: /openbsd-src/sys/kern/uipc_usrreq.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.142 2019/07/16 21:41:37 bluhm Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 
55 void	uipc_setaddr(const struct unpcb *, struct mbuf *);
56 
57 /* list of all UNIX domain sockets, for unp_gc() */
58 LIST_HEAD(unp_head, unpcb) unp_head = LIST_HEAD_INITIALIZER(unp_head);
59 
60 /*
61  * Stack of sets of files that were passed over a socket but were
62  * not received and need to be closed.
63  */
64 struct	unp_deferral {
65 	SLIST_ENTRY(unp_deferral)	ud_link;
66 	int	ud_n;
67 	/* followed by ud_n struct fdpass */
68 	struct fdpass ud_fp[];
69 };
70 
71 void	unp_discard(struct fdpass *, int);
72 void	unp_mark(struct fdpass *, int);
73 void	unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
74 int	unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
75 
76 struct pool unpcb_pool;
77 /* list of sets of files that were sent over sockets that are now closed */
78 SLIST_HEAD(,unp_deferral) unp_deferred = SLIST_HEAD_INITIALIZER(unp_deferred);
79 
80 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
81 
82 
83 /*
84  * Unix communications domain.
85  *
86  * TODO:
87  *	RDM
88  *	rethink name space problems
89  *	need a proper out-of-band
90  */
91 struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
92 ino_t	unp_ino;			/* prototype for fake inode numbers */
93 
94 void
95 unp_init(void)
96 {
97 	pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
98 	    IPL_NONE, 0, "unpcb", NULL);
99 }
100 
101 void
102 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
103 {
104 	if (unp != NULL && unp->unp_addr != NULL) {
105 		nam->m_len = unp->unp_addr->m_len;
106 		memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
107 		    nam->m_len);
108 	} else {
109 		nam->m_len = sizeof(sun_noname);
110 		memcpy(mtod(nam, struct sockaddr *), &sun_noname,
111 		    nam->m_len);
112 	}
113 }
114 
115 int
116 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
117     struct mbuf *control, struct proc *p)
118 {
119 	struct unpcb *unp = sotounpcb(so);
120 	struct unpcb *unp2;
121 	struct socket *so2;
122 	int error = 0;
123 
124 	if (req == PRU_CONTROL)
125 		return (EOPNOTSUPP);
126 	if (req != PRU_SEND && control && control->m_len) {
127 		error = EOPNOTSUPP;
128 		goto release;
129 	}
130 	if (unp == NULL) {
131 		error = EINVAL;
132 		goto release;
133 	}
134 
135 	NET_ASSERT_UNLOCKED();
136 
137 	switch (req) {
138 
139 	case PRU_BIND:
140 		error = unp_bind(unp, nam, p);
141 		break;
142 
143 	case PRU_LISTEN:
144 		if (unp->unp_vnode == NULL)
145 			error = EINVAL;
146 		break;
147 
148 	case PRU_CONNECT:
149 		error = unp_connect(so, nam, p);
150 		break;
151 
152 	case PRU_CONNECT2:
153 		error = unp_connect2(so, (struct socket *)nam);
154 		if (!error) {
155 			unp->unp_connid.uid = p->p_ucred->cr_uid;
156 			unp->unp_connid.gid = p->p_ucred->cr_gid;
157 			unp->unp_connid.pid = p->p_p->ps_pid;
158 			unp->unp_flags |= UNP_FEIDS;
159 			unp2 = sotounpcb((struct socket *)nam);
160 			unp2->unp_connid.uid = p->p_ucred->cr_uid;
161 			unp2->unp_connid.gid = p->p_ucred->cr_gid;
162 			unp2->unp_connid.pid = p->p_p->ps_pid;
163 			unp2->unp_flags |= UNP_FEIDS;
164 		}
165 		break;
166 
167 	case PRU_DISCONNECT:
168 		unp_disconnect(unp);
169 		break;
170 
171 	case PRU_ACCEPT:
172 		/*
173 		 * Pass back name of connected socket,
174 		 * if it was bound and we are still connected
175 		 * (our peer may have closed already!).
176 		 */
177 		uipc_setaddr(unp->unp_conn, nam);
178 		break;
179 
180 	case PRU_SHUTDOWN:
181 		socantsendmore(so);
182 		unp_shutdown(unp);
183 		break;
184 
185 	case PRU_RCVD:
186 		switch (so->so_type) {
187 
188 		case SOCK_DGRAM:
189 			panic("uipc 1");
190 			/*NOTREACHED*/
191 
192 		case SOCK_STREAM:
193 		case SOCK_SEQPACKET:
194 			if (unp->unp_conn == NULL)
195 				break;
196 			so2 = unp->unp_conn->unp_socket;
197 			/*
198 			 * Adjust backpressure on sender
199 			 * and wakeup any waiting to write.
200 			 */
201 			so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
202 			so2->so_snd.sb_cc = so->so_rcv.sb_cc;
203 			sowwakeup(so2);
204 			break;
205 
206 		default:
207 			panic("uipc 2");
208 		}
209 		break;
210 
211 	case PRU_SEND:
212 		if (control && (error = unp_internalize(control, p)))
213 			break;
214 		switch (so->so_type) {
215 
216 		case SOCK_DGRAM: {
217 			struct sockaddr *from;
218 
219 			if (nam) {
220 				if (unp->unp_conn) {
221 					error = EISCONN;
222 					break;
223 				}
224 				error = unp_connect(so, nam, p);
225 				if (error)
226 					break;
227 			} else {
228 				if (unp->unp_conn == NULL) {
229 					error = ENOTCONN;
230 					break;
231 				}
232 			}
233 			so2 = unp->unp_conn->unp_socket;
234 			if (unp->unp_addr)
235 				from = mtod(unp->unp_addr, struct sockaddr *);
236 			else
237 				from = &sun_noname;
238 			if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
239 				sorwakeup(so2);
240 				m = NULL;
241 				control = NULL;
242 			} else
243 				error = ENOBUFS;
244 			if (nam)
245 				unp_disconnect(unp);
246 			break;
247 		}
248 
249 		case SOCK_STREAM:
250 		case SOCK_SEQPACKET:
251 			if (so->so_state & SS_CANTSENDMORE) {
252 				error = EPIPE;
253 				break;
254 			}
255 			if (unp->unp_conn == NULL) {
256 				error = ENOTCONN;
257 				break;
258 			}
259 			so2 = unp->unp_conn->unp_socket;
260 			/*
261 			 * Send to paired receive port, and then raise
262 			 * send buffer counts to maintain backpressure.
263 			 * Wake up readers.
264 			 */
265 			if (control) {
266 				if (sbappendcontrol(so2, &so2->so_rcv, m,
267 				    control)) {
268 					control = NULL;
269 				} else {
270 					error = ENOBUFS;
271 					break;
272 				}
273 			} else if (so->so_type == SOCK_SEQPACKET)
274 				sbappendrecord(so2, &so2->so_rcv, m);
275 			else
276 				sbappend(so2, &so2->so_rcv, m);
277 			so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
278 			so->so_snd.sb_cc = so2->so_rcv.sb_cc;
279 			if (so2->so_rcv.sb_cc > 0)
280 				sorwakeup(so2);
281 			m = NULL;
282 			break;
283 
284 		default:
285 			panic("uipc 4");
286 		}
287 		/* we need to undo unp_internalize in case of errors */
288 		if (control && error)
289 			unp_dispose(control);
290 		break;
291 
292 	case PRU_ABORT:
293 		unp_drop(unp, ECONNABORTED);
294 		break;
295 
296 	case PRU_SENSE: {
297 		struct stat *sb = (struct stat *)m;
298 
299 		sb->st_blksize = so->so_snd.sb_hiwat;
300 		sb->st_dev = NODEV;
301 		if (unp->unp_ino == 0)
302 			unp->unp_ino = unp_ino++;
303 		sb->st_atim.tv_sec =
304 		    sb->st_mtim.tv_sec =
305 		    sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
306 		sb->st_atim.tv_nsec =
307 		    sb->st_mtim.tv_nsec =
308 		    sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
309 		sb->st_ino = unp->unp_ino;
310 		break;
311 	}
312 
313 	case PRU_RCVOOB:
314 	case PRU_SENDOOB:
315 		error = EOPNOTSUPP;
316 		break;
317 
318 	case PRU_SOCKADDR:
319 		uipc_setaddr(unp, nam);
320 		break;
321 
322 	case PRU_PEERADDR:
323 		uipc_setaddr(unp->unp_conn, nam);
324 		break;
325 
326 	case PRU_SLOWTIMO:
327 		break;
328 
329 	default:
330 		panic("uipc_usrreq");
331 	}
332 release:
333 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
334 		m_freem(control);
335 		m_freem(m);
336 	}
337 	return (error);
338 }
339 
340 /*
341  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
342  * for stream sockets, although the total for sender and receiver is
343  * actually only PIPSIZ.
344  * Datagram sockets really use the sendspace as the maximum datagram size,
345  * and don't really want to reserve the sendspace.  Their recvspace should
346  * be large enough for at least one max-size datagram plus address.
347  */
348 #define	PIPSIZ	4096
349 u_long	unpst_sendspace = PIPSIZ;
350 u_long	unpst_recvspace = PIPSIZ;
351 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
352 u_long	unpdg_recvspace = 4*1024;
353 
354 int	unp_rights;			/* file descriptors in flight */
355 
356 int
357 uipc_attach(struct socket *so, int proto)
358 {
359 	struct unpcb *unp;
360 	int error;
361 
362 	if (so->so_pcb)
363 		return EISCONN;
364 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
365 		switch (so->so_type) {
366 
367 		case SOCK_STREAM:
368 		case SOCK_SEQPACKET:
369 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
370 			break;
371 
372 		case SOCK_DGRAM:
373 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
374 			break;
375 
376 		default:
377 			panic("unp_attach");
378 		}
379 		if (error)
380 			return (error);
381 	}
382 	unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO);
383 	if (unp == NULL)
384 		return (ENOBUFS);
385 	unp->unp_socket = so;
386 	so->so_pcb = unp;
387 	getnanotime(&unp->unp_ctime);
388 	LIST_INSERT_HEAD(&unp_head, unp, unp_link);
389 	return (0);
390 }
391 
392 int
393 uipc_detach(struct socket *so)
394 {
395 	struct unpcb *unp = sotounpcb(so);
396 
397 	if (unp == NULL)
398 		return (EINVAL);
399 
400 	NET_ASSERT_UNLOCKED();
401 
402 	unp_detach(unp);
403 
404 	return (0);
405 }
406 
407 void
408 unp_detach(struct unpcb *unp)
409 {
410 	struct vnode *vp;
411 
412 	LIST_REMOVE(unp, unp_link);
413 	if (unp->unp_vnode) {
414 		unp->unp_vnode->v_socket = NULL;
415 		vp = unp->unp_vnode;
416 		unp->unp_vnode = NULL;
417 		vrele(vp);
418 	}
419 	if (unp->unp_conn)
420 		unp_disconnect(unp);
421 	while (!SLIST_EMPTY(&unp->unp_refs))
422 		unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET);
423 	soisdisconnected(unp->unp_socket);
424 	unp->unp_socket->so_pcb = NULL;
425 	m_freem(unp->unp_addr);
426 	pool_put(&unpcb_pool, unp);
427 	if (unp_rights)
428 		task_add(systq, &unp_gc_task);
429 }
430 
431 int
432 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
433 {
434 	struct sockaddr_un *soun;
435 	struct mbuf *nam2;
436 	struct vnode *vp;
437 	struct vattr vattr;
438 	int error;
439 	struct nameidata nd;
440 	size_t pathlen;
441 
442 	if (unp->unp_vnode != NULL)
443 		return (EINVAL);
444 	if ((error = unp_nam2sun(nam, &soun, &pathlen)))
445 		return (error);
446 
447 	nam2 = m_getclr(M_WAITOK, MT_SONAME);
448 	nam2->m_len = sizeof(struct sockaddr_un);
449 	memcpy(mtod(nam2, struct sockaddr_un *), soun,
450 	    offsetof(struct sockaddr_un, sun_path) + pathlen);
451 	/* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
452 
453 	soun = mtod(nam2, struct sockaddr_un *);
454 
455 	/* Fixup sun_len to keep it in sync with m_len. */
456 	soun->sun_len = nam2->m_len;
457 
458 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
459 	    soun->sun_path, p);
460 	nd.ni_pledge = PLEDGE_UNIX;
461 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
462 	if ((error = namei(&nd)) != 0) {
463 		m_freem(nam2);
464 		return (error);
465 	}
466 	vp = nd.ni_vp;
467 	if (vp != NULL) {
468 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
469 		if (nd.ni_dvp == vp)
470 			vrele(nd.ni_dvp);
471 		else
472 			vput(nd.ni_dvp);
473 		vrele(vp);
474 		m_freem(nam2);
475 		return (EADDRINUSE);
476 	}
477 	VATTR_NULL(&vattr);
478 	vattr.va_type = VSOCK;
479 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
480 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
481 	vput(nd.ni_dvp);
482 	if (error) {
483 		m_freem(nam2);
484 		return (error);
485 	}
486 	unp->unp_addr = nam2;
487 	vp = nd.ni_vp;
488 	vp->v_socket = unp->unp_socket;
489 	unp->unp_vnode = vp;
490 	unp->unp_connid.uid = p->p_ucred->cr_uid;
491 	unp->unp_connid.gid = p->p_ucred->cr_gid;
492 	unp->unp_connid.pid = p->p_p->ps_pid;
493 	unp->unp_flags |= UNP_FEIDSBIND;
494 	VOP_UNLOCK(vp);
495 	return (0);
496 }
497 
498 int
499 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
500 {
501 	struct sockaddr_un *soun;
502 	struct vnode *vp;
503 	struct socket *so2, *so3;
504 	struct unpcb *unp, *unp2, *unp3;
505 	struct nameidata nd;
506 	int error;
507 
508 	if ((error = unp_nam2sun(nam, &soun, NULL)))
509 		return (error);
510 
511 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
512 	nd.ni_pledge = PLEDGE_UNIX;
513 	if ((error = namei(&nd)) != 0)
514 		return (error);
515 	vp = nd.ni_vp;
516 	if (vp->v_type != VSOCK) {
517 		error = ENOTSOCK;
518 		goto bad;
519 	}
520 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
521 		goto bad;
522 	so2 = vp->v_socket;
523 	if (so2 == NULL) {
524 		error = ECONNREFUSED;
525 		goto bad;
526 	}
527 	if (so->so_type != so2->so_type) {
528 		error = EPROTOTYPE;
529 		goto bad;
530 	}
531 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
532 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
533 		    (so3 = sonewconn(so2, 0)) == 0) {
534 			error = ECONNREFUSED;
535 			goto bad;
536 		}
537 		unp = sotounpcb(so);
538 		unp2 = sotounpcb(so2);
539 		unp3 = sotounpcb(so3);
540 		if (unp2->unp_addr)
541 			unp3->unp_addr =
542 			    m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
543 		unp3->unp_connid.uid = p->p_ucred->cr_uid;
544 		unp3->unp_connid.gid = p->p_ucred->cr_gid;
545 		unp3->unp_connid.pid = p->p_p->ps_pid;
546 		unp3->unp_flags |= UNP_FEIDS;
547 		so2 = so3;
548 		if (unp2->unp_flags & UNP_FEIDSBIND) {
549 			unp->unp_connid = unp2->unp_connid;
550 			unp->unp_flags |= UNP_FEIDS;
551 		}
552 	}
553 	error = unp_connect2(so, so2);
554 bad:
555 	vput(vp);
556 	return (error);
557 }
558 
559 int
560 unp_connect2(struct socket *so, struct socket *so2)
561 {
562 	struct unpcb *unp = sotounpcb(so);
563 	struct unpcb *unp2;
564 
565 	if (so2->so_type != so->so_type)
566 		return (EPROTOTYPE);
567 	unp2 = sotounpcb(so2);
568 	unp->unp_conn = unp2;
569 	switch (so->so_type) {
570 
571 	case SOCK_DGRAM:
572 		SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
573 		soisconnected(so);
574 		break;
575 
576 	case SOCK_STREAM:
577 	case SOCK_SEQPACKET:
578 		unp2->unp_conn = unp;
579 		soisconnected(so);
580 		soisconnected(so2);
581 		break;
582 
583 	default:
584 		panic("unp_connect2");
585 	}
586 	return (0);
587 }
588 
589 void
590 unp_disconnect(struct unpcb *unp)
591 {
592 	struct unpcb *unp2 = unp->unp_conn;
593 
594 	if (unp2 == NULL)
595 		return;
596 	unp->unp_conn = NULL;
597 	switch (unp->unp_socket->so_type) {
598 
599 	case SOCK_DGRAM:
600 		SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
601 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
602 		break;
603 
604 	case SOCK_STREAM:
605 	case SOCK_SEQPACKET:
606 		unp->unp_socket->so_snd.sb_mbcnt = 0;
607 		unp->unp_socket->so_snd.sb_cc = 0;
608 		soisdisconnected(unp->unp_socket);
609 		unp2->unp_conn = NULL;
610 		unp2->unp_socket->so_snd.sb_mbcnt = 0;
611 		unp2->unp_socket->so_snd.sb_cc = 0;
612 		soisdisconnected(unp2->unp_socket);
613 		break;
614 	}
615 }
616 
617 void
618 unp_shutdown(struct unpcb *unp)
619 {
620 	struct socket *so;
621 
622 	switch (unp->unp_socket->so_type) {
623 	case SOCK_STREAM:
624 	case SOCK_SEQPACKET:
625 		if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
626 			socantrcvmore(so);
627 		break;
628 	default:
629 		break;
630 	}
631 }
632 
633 void
634 unp_drop(struct unpcb *unp, int errno)
635 {
636 	struct socket *so = unp->unp_socket;
637 
638 	KERNEL_ASSERT_LOCKED();
639 
640 	so->so_error = errno;
641 	unp_disconnect(unp);
642 	if (so->so_head) {
643 		so->so_pcb = NULL;
644 		/*
645 		 * As long as the KERNEL_LOCK() is the default lock for Unix
646 		 * sockets, do not release it.
647 		 */
648 		sofree(so, SL_NOUNLOCK);
649 		m_freem(unp->unp_addr);
650 		pool_put(&unpcb_pool, unp);
651 	}
652 }
653 
654 #ifdef notdef
655 unp_drain(void)
656 {
657 
658 }
659 #endif
660 
661 extern	struct domain unixdomain;
662 
663 static struct unpcb *
664 fptounp(struct file *fp)
665 {
666 	struct socket *so;
667 
668 	if (fp->f_type != DTYPE_SOCKET)
669 		return (NULL);
670 	if ((so = fp->f_data) == NULL)
671 		return (NULL);
672 	if (so->so_proto->pr_domain != &unixdomain)
673 		return (NULL);
674 	return (sotounpcb(so));
675 }
676 
677 int
678 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
679 {
680 	struct proc *p = curproc;		/* XXX */
681 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
682 	struct filedesc *fdp = p->p_fd;
683 	int i, *fds = NULL;
684 	struct fdpass *rp;
685 	struct file *fp;
686 	int nfds, error = 0;
687 
688 	/*
689 	 * This code only works because SCM_RIGHTS is the only supported
690 	 * control message type on unix sockets. Enforce this here.
691 	 */
692 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
693 		return EINVAL;
694 
695 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
696 	    sizeof(struct fdpass);
697 	if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
698 		controllen = 0;
699 	else
700 		controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
701 	if (nfds > controllen / sizeof(int)) {
702 		error = EMSGSIZE;
703 		goto restart;
704 	}
705 
706 	/* Make sure the recipient should be able to see the descriptors.. */
707 	rp = (struct fdpass *)CMSG_DATA(cm);
708 	for (i = 0; i < nfds; i++) {
709 		fp = rp->fp;
710 		rp++;
711 		error = pledge_recvfd(p, fp);
712 		if (error)
713 			break;
714 
715 		/*
716 		 * No to block devices.  If passing a directory,
717 		 * make sure that it is underneath the root.
718 		 */
719 		if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
720 			struct vnode *vp = (struct vnode *)fp->f_data;
721 
722 			if (vp->v_type == VBLK ||
723 			    (vp->v_type == VDIR &&
724 			    !vn_isunder(vp, fdp->fd_rdir, p))) {
725 				error = EPERM;
726 				break;
727 			}
728 		}
729 	}
730 
731 	fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
732 
733 restart:
734 	fdplock(fdp);
735 	if (error != 0) {
736 		if (nfds > 0) {
737 			rp = ((struct fdpass *)CMSG_DATA(cm));
738 			unp_discard(rp, nfds);
739 		}
740 		goto out;
741 	}
742 
743 	/*
744 	 * First loop -- allocate file descriptor table slots for the
745 	 * new descriptors.
746 	 */
747 	rp = ((struct fdpass *)CMSG_DATA(cm));
748 	for (i = 0; i < nfds; i++) {
749 		if ((error = fdalloc(p, 0, &fds[i])) != 0) {
750 			/*
751 			 * Back out what we've done so far.
752 			 */
753 			for (--i; i >= 0; i--)
754 				fdremove(fdp, fds[i]);
755 
756 			if (error == ENOSPC) {
757 				fdexpand(p);
758 				error = 0;
759 			} else {
760 				/*
761 				 * This is the error that has historically
762 				 * been returned, and some callers may
763 				 * expect it.
764 				 */
765 				error = EMSGSIZE;
766 			}
767 			fdpunlock(fdp);
768 			goto restart;
769 		}
770 
771 		/*
772 		 * Make the slot reference the descriptor so that
773 		 * fdalloc() works properly.. We finalize it all
774 		 * in the loop below.
775 		 */
776 		mtx_enter(&fdp->fd_fplock);
777 		KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
778 		fdp->fd_ofiles[fds[i]] = rp->fp;
779 		mtx_leave(&fdp->fd_fplock);
780 
781 		fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
782 		if (flags & MSG_CMSG_CLOEXEC)
783 			fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
784 
785 		rp++;
786 	}
787 
788 	/*
789 	 * Now that adding them has succeeded, update all of the
790 	 * descriptor passing state.
791 	 */
792 	rp = (struct fdpass *)CMSG_DATA(cm);
793 	for (i = 0; i < nfds; i++) {
794 		struct unpcb *unp;
795 
796 		fp = rp->fp;
797 		rp++;
798 		if ((unp = fptounp(fp)) != NULL)
799 			unp->unp_msgcount--;
800 		unp_rights--;
801 	}
802 
803 	/*
804 	 * Copy temporary array to message and adjust length, in case of
805 	 * transition from large struct file pointers to ints.
806 	 */
807 	memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
808 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
809 	rights->m_len = CMSG_LEN(nfds * sizeof(int));
810  out:
811 	fdpunlock(fdp);
812 	if (fds != NULL)
813 		free(fds, M_TEMP, nfds * sizeof(int));
814 	return (error);
815 }
816 
817 int
818 unp_internalize(struct mbuf *control, struct proc *p)
819 {
820 	struct filedesc *fdp = p->p_fd;
821 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
822 	struct fdpass *rp;
823 	struct file *fp;
824 	struct unpcb *unp;
825 	int i, error;
826 	int nfds, *ip, fd, neededspace;
827 
828 	/*
829 	 * Check for two potential msg_controllen values because
830 	 * IETF stuck their nose in a place it does not belong.
831 	 */
832 	if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
833 		return (EINVAL);
834 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
835 	    !(cm->cmsg_len == control->m_len ||
836 	    control->m_len == CMSG_ALIGN(cm->cmsg_len)))
837 		return (EINVAL);
838 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
839 
840 	if (unp_rights + nfds > maxfiles / 10)
841 		return (EMFILE);
842 
843 	/* Make sure we have room for the struct file pointers */
844 morespace:
845 	neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
846 	    control->m_len;
847 	if (neededspace > m_trailingspace(control)) {
848 		char *tmp;
849 		/* if we already have a cluster, the message is just too big */
850 		if (control->m_flags & M_EXT)
851 			return (E2BIG);
852 
853 		/* copy cmsg data temporarily out of the mbuf */
854 		tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
855 		memcpy(tmp, mtod(control, caddr_t), control->m_len);
856 
857 		/* allocate a cluster and try again */
858 		MCLGET(control, M_WAIT);
859 		if ((control->m_flags & M_EXT) == 0) {
860 			free(tmp, M_TEMP, control->m_len);
861 			return (ENOBUFS);       /* allocation failed */
862 		}
863 
864 		/* copy the data back into the cluster */
865 		cm = mtod(control, struct cmsghdr *);
866 		memcpy(cm, tmp, control->m_len);
867 		free(tmp, M_TEMP, control->m_len);
868 		goto morespace;
869 	}
870 
871 	/* adjust message & mbuf to note amount of space actually used. */
872 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
873 	control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
874 
875 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
876 	rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
877 	fdplock(fdp);
878 	for (i = 0; i < nfds; i++) {
879 		memcpy(&fd, ip, sizeof fd);
880 		ip--;
881 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
882 			error = EBADF;
883 			goto fail;
884 		}
885 		if (fp->f_count >= FDUP_MAX_COUNT) {
886 			error = EDEADLK;
887 			goto fail;
888 		}
889 		error = pledge_sendfd(p, fp);
890 		if (error)
891 			goto fail;
892 
893 		/* kqueue descriptors cannot be copied */
894 		if (fp->f_type == DTYPE_KQUEUE) {
895 			error = EINVAL;
896 			goto fail;
897 		}
898 		rp->fp = fp;
899 		rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
900 		rp--;
901 		if ((unp = fptounp(fp)) != NULL) {
902 			unp->unp_file = fp;
903 			unp->unp_msgcount++;
904 		}
905 		unp_rights++;
906 	}
907 	fdpunlock(fdp);
908 	return (0);
909 fail:
910 	fdpunlock(fdp);
911 	if (fp != NULL)
912 		FRELE(fp, p);
913 	/* Back out what we just did. */
914 	for ( ; i > 0; i--) {
915 		rp++;
916 		fp = rp->fp;
917 		if ((unp = fptounp(fp)) != NULL)
918 			unp->unp_msgcount--;
919 		FRELE(fp, p);
920 		unp_rights--;
921 	}
922 
923 	return (error);
924 }
925 
926 int	unp_defer, unp_gcing;
927 
928 void
929 unp_gc(void *arg __unused)
930 {
931 	struct unp_deferral *defer;
932 	struct file *fp;
933 	struct socket *so;
934 	struct unpcb *unp;
935 	int nunref, i;
936 
937 	if (unp_gcing)
938 		return;
939 	unp_gcing = 1;
940 
941 	/* close any fds on the deferred list */
942 	while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
943 		SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
944 		for (i = 0; i < defer->ud_n; i++) {
945 			fp = defer->ud_fp[i].fp;
946 			if (fp == NULL)
947 				continue;
948 			 /* closef() expects a refcount of 2 */
949 			FREF(fp);
950 			if ((unp = fptounp(fp)) != NULL)
951 				unp->unp_msgcount--;
952 			unp_rights--;
953 			(void) closef(fp, NULL);
954 		}
955 		free(defer, M_TEMP, sizeof(*defer) +
956 		    sizeof(struct fdpass) * defer->ud_n);
957 	}
958 
959 	unp_defer = 0;
960 	LIST_FOREACH(unp, &unp_head, unp_link)
961 		unp->unp_flags &= ~(UNP_GCMARK | UNP_GCDEFER | UNP_GCDEAD);
962 	do {
963 		nunref = 0;
964 		LIST_FOREACH(unp, &unp_head, unp_link) {
965 			fp = unp->unp_file;
966 			if (unp->unp_flags & UNP_GCDEFER) {
967 				/*
968 				 * This socket is referenced by another
969 				 * socket which is known to be live,
970 				 * so it's certainly live.
971 				 */
972 				unp->unp_flags &= ~UNP_GCDEFER;
973 				unp_defer--;
974 			} else if (unp->unp_flags & UNP_GCMARK) {
975 				/* marked as live in previous pass */
976 				continue;
977 			} else if (fp == NULL) {
978 				/* not being passed, so can't be in loop */
979 			} else if (fp->f_count == 0) {
980 				/*
981 				 * Already being closed, let normal close
982 				 * path take its course
983 				 */
984 			} else {
985 				/*
986 				 * Unreferenced by other sockets so far,
987 				 * so if all the references (f_count) are
988 				 * from passing (unp_msgcount) then this
989 				 * socket is prospectively dead
990 				 */
991 				if (fp->f_count == unp->unp_msgcount) {
992 					nunref++;
993 					unp->unp_flags |= UNP_GCDEAD;
994 					continue;
995 				}
996 			}
997 
998 			/*
999 			 * This is the first time we've seen this socket on
1000 			 * the mark pass and known it has a live reference,
1001 			 * so mark it, then scan its receive buffer for
1002 			 * sockets and note them as deferred (== referenced,
1003 			 * but not yet marked).
1004 			 */
1005 			unp->unp_flags |= UNP_GCMARK;
1006 
1007 			so = unp->unp_socket;
1008 			unp_scan(so->so_rcv.sb_mb, unp_mark);
1009 		}
1010 	} while (unp_defer);
1011 
1012 	/*
1013 	 * If there are any unreferenced sockets, then for each dispose
1014 	 * of files in its receive buffer and then close it.
1015 	 */
1016 	if (nunref) {
1017 		LIST_FOREACH(unp, &unp_head, unp_link) {
1018 			if (unp->unp_flags & UNP_GCDEAD)
1019 				unp_scan(unp->unp_socket->so_rcv.sb_mb,
1020 				    unp_discard);
1021 		}
1022 	}
1023 	unp_gcing = 0;
1024 }
1025 
1026 void
1027 unp_dispose(struct mbuf *m)
1028 {
1029 
1030 	if (m)
1031 		unp_scan(m, unp_discard);
1032 }
1033 
1034 void
1035 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1036 {
1037 	struct mbuf *m;
1038 	struct fdpass *rp;
1039 	struct cmsghdr *cm;
1040 	int qfds;
1041 
1042 	while (m0) {
1043 		for (m = m0; m; m = m->m_next) {
1044 			if (m->m_type == MT_CONTROL &&
1045 			    m->m_len >= sizeof(*cm)) {
1046 				cm = mtod(m, struct cmsghdr *);
1047 				if (cm->cmsg_level != SOL_SOCKET ||
1048 				    cm->cmsg_type != SCM_RIGHTS)
1049 					continue;
1050 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1051 				    / sizeof(struct fdpass);
1052 				if (qfds > 0) {
1053 					rp = (struct fdpass *)CMSG_DATA(cm);
1054 					op(rp, qfds);
1055 				}
1056 				break;		/* XXX, but saves time */
1057 			}
1058 		}
1059 		m0 = m0->m_nextpkt;
1060 	}
1061 }
1062 
1063 void
1064 unp_mark(struct fdpass *rp, int nfds)
1065 {
1066 	struct unpcb *unp;
1067 	int i;
1068 
1069 	for (i = 0; i < nfds; i++) {
1070 		if (rp[i].fp == NULL)
1071 			continue;
1072 
1073 		unp = fptounp(rp[i].fp);
1074 		if (unp == NULL)
1075 			continue;
1076 
1077 		if (unp->unp_flags & (UNP_GCMARK|UNP_GCDEFER))
1078 			continue;
1079 
1080 		unp_defer++;
1081 		unp->unp_flags |= UNP_GCDEFER;
1082 		unp->unp_flags &= ~UNP_GCDEAD;
1083 	}
1084 }
1085 
1086 void
1087 unp_discard(struct fdpass *rp, int nfds)
1088 {
1089 	struct unp_deferral *defer;
1090 
1091 	/* copy the file pointers to a deferral structure */
1092 	defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1093 	defer->ud_n = nfds;
1094 	memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1095 	memset(rp, 0, sizeof(*rp) * nfds);
1096 	SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1097 
1098 	task_add(systq, &unp_gc_task);
1099 }
1100 
1101 int
1102 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1103 {
1104 	struct sockaddr *sa = mtod(nam, struct sockaddr *);
1105 	size_t size, len;
1106 
1107 	if (nam->m_len < offsetof(struct sockaddr, sa_data))
1108 		return EINVAL;
1109 	if (sa->sa_family != AF_UNIX)
1110 		return EAFNOSUPPORT;
1111 	if (sa->sa_len != nam->m_len)
1112 		return EINVAL;
1113 	if (sa->sa_len > sizeof(struct sockaddr_un))
1114 		return EINVAL;
1115 	*sun = (struct sockaddr_un *)sa;
1116 
1117 	/* ensure that sun_path is NUL terminated and fits */
1118 	size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1119 	len = strnlen((*sun)->sun_path, size);
1120 	if (len == sizeof((*sun)->sun_path))
1121 		return EINVAL;
1122 	if (len == size) {
1123 		if (m_trailingspace(nam) == 0)
1124 			return EINVAL;
1125 		nam->m_len++;
1126 		(*sun)->sun_len++;
1127 		(*sun)->sun_path[len] = '\0';
1128 	}
1129 	if (pathlen != NULL)
1130 		*pathlen = len;
1131 
1132 	return 0;
1133 }
1134