xref: /openbsd-src/sys/kern/uipc_usrreq.c (revision 02a827120adb2d89281e9b7a793e5d4eb9e5b56e)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.203 2024/03/26 09:46:47 mvs Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 #include <sys/rwlock.h>
55 #include <sys/mutex.h>
56 #include <sys/sysctl.h>
57 #include <sys/lock.h>
58 #include <sys/refcnt.h>
59 
60 #include "kcov.h"
61 #if NKCOV > 0
62 #include <sys/kcov.h>
63 #endif
64 
65 /*
66  * Locks used to protect global data and struct members:
67  *      I       immutable after creation
68  *      D       unp_df_lock
69  *      G       unp_gc_lock
70  *      M       unp_ino_mtx
71  *      R       unp_rights_mtx
72  *      a       atomic
73  *      s       socket lock
74  */
75 
76 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
77 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
78 
79 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
80 struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
81 
82 /*
83  * Stack of sets of files that were passed over a socket but were
84  * not received and need to be closed.
85  */
86 struct	unp_deferral {
87 	SLIST_ENTRY(unp_deferral)	ud_link;	/* [D] */
88 	int				ud_n;		/* [I] */
89 	/* followed by ud_n struct fdpass */
90 	struct fdpass			ud_fp[];	/* [I] */
91 };
92 
93 void	uipc_setaddr(const struct unpcb *, struct mbuf *);
94 void	unp_discard(struct fdpass *, int);
95 void	unp_remove_gcrefs(struct fdpass *, int);
96 void	unp_restore_gcrefs(struct fdpass *, int);
97 void	unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
98 int	unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
99 static inline void unp_ref(struct unpcb *);
100 static inline void unp_rele(struct unpcb *);
101 struct socket *unp_solock_peer(struct socket *);
102 
103 struct pool unpcb_pool;
104 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
105 
106 /*
107  * Unix communications domain.
108  *
109  * TODO:
110  *	RDM
111  *	rethink name space problems
112  *	need a proper out-of-band
113  */
114 const struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
115 
116 /* [G] list of all UNIX domain sockets, for unp_gc() */
117 LIST_HEAD(unp_head, unpcb)	unp_head =
118 	LIST_HEAD_INITIALIZER(unp_head);
119 /* [D] list of sets of files that were sent over sockets that are now closed */
120 SLIST_HEAD(,unp_deferral)	unp_deferred =
121 	SLIST_HEAD_INITIALIZER(unp_deferred);
122 
123 ino_t	unp_ino;	/* [U] prototype for fake inode numbers */
124 int	unp_rights;	/* [R] file descriptors in flight */
125 int	unp_defer;	/* [G] number of deferred fp to close by the GC task */
126 int	unp_gcing;	/* [G] GC task currently running */
127 
128 const struct pr_usrreqs uipc_usrreqs = {
129 	.pru_attach	= uipc_attach,
130 	.pru_detach	= uipc_detach,
131 	.pru_bind	= uipc_bind,
132 	.pru_listen	= uipc_listen,
133 	.pru_connect	= uipc_connect,
134 	.pru_accept	= uipc_accept,
135 	.pru_disconnect	= uipc_disconnect,
136 	.pru_shutdown	= uipc_shutdown,
137 	.pru_rcvd	= uipc_rcvd,
138 	.pru_send	= uipc_send,
139 	.pru_abort	= uipc_abort,
140 	.pru_sense	= uipc_sense,
141 	.pru_sockaddr	= uipc_sockaddr,
142 	.pru_peeraddr	= uipc_peeraddr,
143 	.pru_connect2	= uipc_connect2,
144 };
145 
146 const struct pr_usrreqs uipc_dgram_usrreqs = {
147 	.pru_attach	= uipc_attach,
148 	.pru_detach	= uipc_detach,
149 	.pru_bind	= uipc_bind,
150 	.pru_listen	= uipc_listen,
151 	.pru_connect	= uipc_connect,
152 	.pru_disconnect	= uipc_disconnect,
153 	.pru_shutdown	= uipc_dgram_shutdown,
154 	.pru_send	= uipc_dgram_send,
155 	.pru_sense	= uipc_sense,
156 	.pru_sockaddr	= uipc_sockaddr,
157 	.pru_peeraddr	= uipc_peeraddr,
158 	.pru_connect2	= uipc_connect2,
159 };
160 
161 void
162 unp_init(void)
163 {
164 	pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
165 	    IPL_SOFTNET, 0, "unpcb", NULL);
166 }
167 
168 static inline void
169 unp_ref(struct unpcb *unp)
170 {
171 	refcnt_take(&unp->unp_refcnt);
172 }
173 
174 static inline void
175 unp_rele(struct unpcb *unp)
176 {
177 	refcnt_rele_wake(&unp->unp_refcnt);
178 }
179 
180 struct socket *
181 unp_solock_peer(struct socket *so)
182 {
183 	struct unpcb *unp, *unp2;
184 	struct socket *so2;
185 
186 	unp = so->so_pcb;
187 
188 again:
189 	if ((unp2 = unp->unp_conn) == NULL)
190 		return NULL;
191 
192 	so2 = unp2->unp_socket;
193 
194 	if (so < so2)
195 		solock(so2);
196 	else if (so > so2) {
197 		unp_ref(unp2);
198 		sounlock(so);
199 		solock(so2);
200 		solock(so);
201 
202 		/* Datagram socket could be reconnected due to re-lock. */
203 		if (unp->unp_conn != unp2) {
204 			sounlock(so2);
205 			unp_rele(unp2);
206 			goto again;
207 		}
208 
209 		unp_rele(unp2);
210 	}
211 
212 	return so2;
213 }
214 
215 void
216 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
217 {
218 	if (unp != NULL && unp->unp_addr != NULL) {
219 		nam->m_len = unp->unp_addr->m_len;
220 		memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
221 		    nam->m_len);
222 	} else {
223 		nam->m_len = sizeof(sun_noname);
224 		memcpy(mtod(nam, struct sockaddr *), &sun_noname,
225 		    nam->m_len);
226 	}
227 }
228 
229 /*
230  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
231  * for stream sockets, although the total for sender and receiver is
232  * actually only PIPSIZ.
233  * Datagram sockets really use the sendspace as the maximum datagram size,
234  * and don't really want to reserve the sendspace.  Their recvspace should
235  * be large enough for at least one max-size datagram plus address.
236  */
237 #define	PIPSIZ	8192
238 u_int	unpst_sendspace = PIPSIZ;
239 u_int	unpst_recvspace = PIPSIZ;
240 u_int	unpsq_sendspace = PIPSIZ;
241 u_int	unpsq_recvspace = PIPSIZ;
242 u_int	unpdg_sendspace = 2*1024;	/* really max datagram size */
243 u_int	unpdg_recvspace = 16*1024;
244 
245 const struct sysctl_bounded_args unpstctl_vars[] = {
246 	{ UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX },
247 	{ UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX },
248 };
249 const struct sysctl_bounded_args unpsqctl_vars[] = {
250 	{ UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX },
251 	{ UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX },
252 };
253 const struct sysctl_bounded_args unpdgctl_vars[] = {
254 	{ UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX },
255 	{ UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX },
256 };
257 
258 int
259 uipc_attach(struct socket *so, int proto, int wait)
260 {
261 	struct unpcb *unp;
262 	int error;
263 
264 	if (so->so_pcb)
265 		return EISCONN;
266 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
267 		switch (so->so_type) {
268 
269 		case SOCK_STREAM:
270 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
271 			break;
272 
273 		case SOCK_SEQPACKET:
274 			error = soreserve(so, unpsq_sendspace, unpsq_recvspace);
275 			break;
276 
277 		case SOCK_DGRAM:
278 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
279 			break;
280 
281 		default:
282 			panic("unp_attach");
283 		}
284 		if (error)
285 			return (error);
286 	}
287 	unp = pool_get(&unpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
288 	    PR_ZERO);
289 	if (unp == NULL)
290 		return (ENOBUFS);
291 	refcnt_init(&unp->unp_refcnt);
292 	unp->unp_socket = so;
293 	so->so_pcb = unp;
294 	getnanotime(&unp->unp_ctime);
295 
296 	/*
297 	 * Enforce `unp_gc_lock' -> `solock()' lock order.
298 	 */
299 	sounlock(so);
300 	rw_enter_write(&unp_gc_lock);
301 	LIST_INSERT_HEAD(&unp_head, unp, unp_link);
302 	rw_exit_write(&unp_gc_lock);
303 	solock(so);
304 	return (0);
305 }
306 
307 int
308 uipc_detach(struct socket *so)
309 {
310 	struct unpcb *unp = sotounpcb(so);
311 
312 	if (unp == NULL)
313 		return (EINVAL);
314 
315 	unp_detach(unp);
316 
317 	return (0);
318 }
319 
320 int
321 uipc_bind(struct socket *so, struct mbuf *nam, struct proc *p)
322 {
323 	struct unpcb *unp = sotounpcb(so);
324 	struct sockaddr_un *soun;
325 	struct mbuf *nam2;
326 	struct vnode *vp;
327 	struct vattr vattr;
328 	int error;
329 	struct nameidata nd;
330 	size_t pathlen;
331 
332 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
333 		return (EINVAL);
334 	if (unp->unp_vnode != NULL)
335 		return (EINVAL);
336 	if ((error = unp_nam2sun(nam, &soun, &pathlen)))
337 		return (error);
338 
339 	unp->unp_flags |= UNP_BINDING;
340 
341 	/*
342 	 * Enforce `i_lock' -> `solock' because fifo subsystem
343 	 * requires it. The socket can't be closed concurrently
344 	 * because the file descriptor reference is still held.
345 	 */
346 
347 	sounlock(unp->unp_socket);
348 
349 	nam2 = m_getclr(M_WAITOK, MT_SONAME);
350 	nam2->m_len = sizeof(struct sockaddr_un);
351 	memcpy(mtod(nam2, struct sockaddr_un *), soun,
352 	    offsetof(struct sockaddr_un, sun_path) + pathlen);
353 	/* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
354 
355 	soun = mtod(nam2, struct sockaddr_un *);
356 
357 	/* Fixup sun_len to keep it in sync with m_len. */
358 	soun->sun_len = nam2->m_len;
359 
360 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
361 	    soun->sun_path, p);
362 	nd.ni_pledge = PLEDGE_UNIX;
363 	nd.ni_unveil = UNVEIL_CREATE;
364 
365 	KERNEL_LOCK();
366 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
367 	error = namei(&nd);
368 	if (error != 0) {
369 		m_freem(nam2);
370 		solock(unp->unp_socket);
371 		goto out;
372 	}
373 	vp = nd.ni_vp;
374 	if (vp != NULL) {
375 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
376 		if (nd.ni_dvp == vp)
377 			vrele(nd.ni_dvp);
378 		else
379 			vput(nd.ni_dvp);
380 		vrele(vp);
381 		m_freem(nam2);
382 		error = EADDRINUSE;
383 		solock(unp->unp_socket);
384 		goto out;
385 	}
386 	VATTR_NULL(&vattr);
387 	vattr.va_type = VSOCK;
388 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
389 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
390 	vput(nd.ni_dvp);
391 	if (error) {
392 		m_freem(nam2);
393 		solock(unp->unp_socket);
394 		goto out;
395 	}
396 	solock(unp->unp_socket);
397 	unp->unp_addr = nam2;
398 	vp = nd.ni_vp;
399 	vp->v_socket = unp->unp_socket;
400 	unp->unp_vnode = vp;
401 	unp->unp_connid.uid = p->p_ucred->cr_uid;
402 	unp->unp_connid.gid = p->p_ucred->cr_gid;
403 	unp->unp_connid.pid = p->p_p->ps_pid;
404 	unp->unp_flags |= UNP_FEIDSBIND;
405 	VOP_UNLOCK(vp);
406 out:
407 	KERNEL_UNLOCK();
408 	unp->unp_flags &= ~UNP_BINDING;
409 
410 	return (error);
411 }
412 
413 int
414 uipc_listen(struct socket *so)
415 {
416 	struct unpcb *unp = sotounpcb(so);
417 
418 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
419 		return (EINVAL);
420 	if (unp->unp_vnode == NULL)
421 		return (EINVAL);
422 	return (0);
423 }
424 
425 int
426 uipc_connect(struct socket *so, struct mbuf *nam)
427 {
428 	return unp_connect(so, nam, curproc);
429 }
430 
431 int
432 uipc_accept(struct socket *so, struct mbuf *nam)
433 {
434 	struct socket *so2;
435 	struct unpcb *unp = sotounpcb(so);
436 
437 	/*
438 	 * Pass back name of connected socket, if it was bound and
439 	 * we are still connected (our peer may have closed already!).
440 	 */
441 	so2 = unp_solock_peer(so);
442 	uipc_setaddr(unp->unp_conn, nam);
443 
444 	if (so2 != NULL && so2 != so)
445 		sounlock(so2);
446 	return (0);
447 }
448 
449 int
450 uipc_disconnect(struct socket *so)
451 {
452 	struct unpcb *unp = sotounpcb(so);
453 
454 	unp_disconnect(unp);
455 	return (0);
456 }
457 
458 int
459 uipc_shutdown(struct socket *so)
460 {
461 	struct unpcb *unp = sotounpcb(so);
462 	struct socket *so2;
463 
464 	socantsendmore(so);
465 
466 	if ((so2 = unp_solock_peer(unp->unp_socket))){
467 		socantrcvmore(so2);
468 		sounlock(so2);
469 	}
470 
471 	return (0);
472 }
473 
474 int
475 uipc_dgram_shutdown(struct socket *so)
476 {
477 	socantsendmore(so);
478 	return (0);
479 }
480 
481 void
482 uipc_rcvd(struct socket *so)
483 {
484 	struct socket *so2;
485 
486 	if ((so2 = unp_solock_peer(so)) == NULL)
487 		return;
488 	/*
489 	 * Adjust backpressure on sender
490 	 * and wakeup any waiting to write.
491 	 */
492 	mtx_enter(&so->so_rcv.sb_mtx);
493 	so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
494 	so2->so_snd.sb_cc = so->so_rcv.sb_cc;
495 	mtx_leave(&so->so_rcv.sb_mtx);
496 	sowwakeup(so2);
497 	sounlock(so2);
498 }
499 
500 int
501 uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
502     struct mbuf *control)
503 {
504 	struct unpcb *unp = sotounpcb(so);
505 	struct socket *so2;
506 	int error = 0, dowakeup = 0;
507 
508 	if (control) {
509 		sounlock(so);
510 		error = unp_internalize(control, curproc);
511 		solock(so);
512 		if (error)
513 			goto out;
514 	}
515 
516 	if (so->so_snd.sb_state & SS_CANTSENDMORE) {
517 		error = EPIPE;
518 		goto dispose;
519 	}
520 	if (unp->unp_conn == NULL) {
521 		error = ENOTCONN;
522 		goto dispose;
523 	}
524 
525 	so2 = unp->unp_conn->unp_socket;
526 
527 	/*
528 	 * Send to paired receive port, and then raise
529 	 * send buffer counts to maintain backpressure.
530 	 * Wake up readers.
531 	 */
532 	mtx_enter(&so2->so_rcv.sb_mtx);
533 	if (control) {
534 		if (sbappendcontrol(so2, &so2->so_rcv, m, control)) {
535 			control = NULL;
536 		} else {
537 			mtx_leave(&so2->so_rcv.sb_mtx);
538 			error = ENOBUFS;
539 			goto dispose;
540 		}
541 	} else if (so->so_type == SOCK_SEQPACKET)
542 		sbappendrecord(so2, &so2->so_rcv, m);
543 	else
544 		sbappend(so2, &so2->so_rcv, m);
545 	so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
546 	so->so_snd.sb_cc = so2->so_rcv.sb_cc;
547 	if (so2->so_rcv.sb_cc > 0)
548 		dowakeup = 1;
549 	mtx_leave(&so2->so_rcv.sb_mtx);
550 
551 	if (dowakeup)
552 		sorwakeup(so2);
553 
554 	m = NULL;
555 
556 dispose:
557 	/* we need to undo unp_internalize in case of errors */
558 	if (control && error)
559 		unp_dispose(control);
560 
561 out:
562 	m_freem(control);
563 	m_freem(m);
564 
565 	return (error);
566 }
567 
568 int
569 uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
570     struct mbuf *control)
571 {
572 	struct unpcb *unp = sotounpcb(so);
573 	struct socket *so2;
574 	const struct sockaddr *from;
575 	int error = 0, dowakeup = 0;
576 
577 	if (control) {
578 		sounlock(so);
579 		error = unp_internalize(control, curproc);
580 		solock(so);
581 		if (error)
582 			goto out;
583 	}
584 
585 	if (nam) {
586 		if (unp->unp_conn) {
587 			error = EISCONN;
588 			goto dispose;
589 		}
590 		error = unp_connect(so, nam, curproc);
591 		if (error)
592 			goto dispose;
593 	}
594 
595 	if (unp->unp_conn == NULL) {
596 		if (nam != NULL)
597 			error = ECONNREFUSED;
598 		else
599 			error = ENOTCONN;
600 		goto dispose;
601 	}
602 
603 	so2 = unp->unp_conn->unp_socket;
604 
605 	if (unp->unp_addr)
606 		from = mtod(unp->unp_addr, struct sockaddr *);
607 	else
608 		from = &sun_noname;
609 
610 	mtx_enter(&so2->so_rcv.sb_mtx);
611 	if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
612 		dowakeup = 1;
613 		m = NULL;
614 		control = NULL;
615 	} else
616 		error = ENOBUFS;
617 	mtx_leave(&so2->so_rcv.sb_mtx);
618 
619 	if (dowakeup)
620 		sorwakeup(so2);
621 	if (nam)
622 		unp_disconnect(unp);
623 
624 dispose:
625 	/* we need to undo unp_internalize in case of errors */
626 	if (control && error)
627 		unp_dispose(control);
628 
629 out:
630 	m_freem(control);
631 	m_freem(m);
632 
633 	return (error);
634 }
635 
636 void
637 uipc_abort(struct socket *so)
638 {
639 	struct unpcb *unp = sotounpcb(so);
640 
641 	unp_detach(unp);
642 	sofree(so, 0);
643 }
644 
645 int
646 uipc_sense(struct socket *so, struct stat *sb)
647 {
648 	struct unpcb *unp = sotounpcb(so);
649 
650 	sb->st_blksize = so->so_snd.sb_hiwat;
651 	sb->st_dev = NODEV;
652 	mtx_enter(&unp_ino_mtx);
653 	if (unp->unp_ino == 0)
654 		unp->unp_ino = unp_ino++;
655 	mtx_leave(&unp_ino_mtx);
656 	sb->st_atim.tv_sec =
657 	    sb->st_mtim.tv_sec =
658 	    sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
659 	sb->st_atim.tv_nsec =
660 	    sb->st_mtim.tv_nsec =
661 	    sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
662 	sb->st_ino = unp->unp_ino;
663 
664 	return (0);
665 }
666 
667 int
668 uipc_sockaddr(struct socket *so, struct mbuf *nam)
669 {
670 	struct unpcb *unp = sotounpcb(so);
671 
672 	uipc_setaddr(unp, nam);
673 	return (0);
674 }
675 
676 int
677 uipc_peeraddr(struct socket *so, struct mbuf *nam)
678 {
679 	struct unpcb *unp = sotounpcb(so);
680 	struct socket *so2;
681 
682 	so2 = unp_solock_peer(so);
683 	uipc_setaddr(unp->unp_conn, nam);
684 	if (so2 != NULL && so2 != so)
685 		sounlock(so2);
686 	return (0);
687 }
688 
689 int
690 uipc_connect2(struct socket *so, struct socket *so2)
691 {
692 	struct unpcb *unp = sotounpcb(so), *unp2;
693 	int error;
694 
695 	if ((error = unp_connect2(so, so2)))
696 		return (error);
697 
698 	unp->unp_connid.uid = curproc->p_ucred->cr_uid;
699 	unp->unp_connid.gid = curproc->p_ucred->cr_gid;
700 	unp->unp_connid.pid = curproc->p_p->ps_pid;
701 	unp->unp_flags |= UNP_FEIDS;
702 	unp2 = sotounpcb(so2);
703 	unp2->unp_connid.uid = curproc->p_ucred->cr_uid;
704 	unp2->unp_connid.gid = curproc->p_ucred->cr_gid;
705 	unp2->unp_connid.pid = curproc->p_p->ps_pid;
706 	unp2->unp_flags |= UNP_FEIDS;
707 
708 	return (0);
709 }
710 
711 int
712 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
713     size_t newlen)
714 {
715 	int *valp = &unp_defer;
716 
717 	/* All sysctl names at this level are terminal. */
718 	switch (name[0]) {
719 	case SOCK_STREAM:
720 		if (namelen != 2)
721 			return (ENOTDIR);
722 		return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars),
723 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
724 	case SOCK_SEQPACKET:
725 		if (namelen != 2)
726 			return (ENOTDIR);
727 		return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars),
728 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
729 	case SOCK_DGRAM:
730 		if (namelen != 2)
731 			return (ENOTDIR);
732 		return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars),
733 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
734 	case NET_UNIX_INFLIGHT:
735 		valp = &unp_rights;
736 		/* FALLTHROUGH */
737 	case NET_UNIX_DEFERRED:
738 		if (namelen != 1)
739 			return (ENOTDIR);
740 		return sysctl_rdint(oldp, oldlenp, newp, *valp);
741 	default:
742 		return (ENOPROTOOPT);
743 	}
744 }
745 
746 void
747 unp_detach(struct unpcb *unp)
748 {
749 	struct socket *so = unp->unp_socket;
750 	struct vnode *vp = unp->unp_vnode;
751 	struct unpcb *unp2;
752 
753 	unp->unp_vnode = NULL;
754 
755 	/*
756 	 * Enforce `unp_gc_lock' -> `solock()' lock order.
757 	 * Enforce `i_lock' -> `solock()' lock order.
758 	 */
759 	sounlock(so);
760 
761 	rw_enter_write(&unp_gc_lock);
762 	LIST_REMOVE(unp, unp_link);
763 	rw_exit_write(&unp_gc_lock);
764 
765 	if (vp != NULL) {
766 		VOP_LOCK(vp, LK_EXCLUSIVE);
767 		vp->v_socket = NULL;
768 
769 		KERNEL_LOCK();
770 		vput(vp);
771 		KERNEL_UNLOCK();
772 	}
773 
774 	solock(so);
775 
776 	if (unp->unp_conn != NULL) {
777 		/*
778 		 * Datagram socket could be connected to itself.
779 		 * Such socket will be disconnected here.
780 		 */
781 		unp_disconnect(unp);
782 	}
783 
784 	while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) {
785 		struct socket *so2 = unp2->unp_socket;
786 
787 		if (so < so2)
788 			solock(so2);
789 		else {
790 			unp_ref(unp2);
791 			sounlock(so);
792 			solock(so2);
793 			solock(so);
794 
795 			if (unp2->unp_conn != unp) {
796 				/* `unp2' was disconnected due to re-lock. */
797 				sounlock(so2);
798 				unp_rele(unp2);
799 				continue;
800 			}
801 
802 			unp_rele(unp2);
803 		}
804 
805 		unp2->unp_conn = NULL;
806 		SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref);
807 		so2->so_error = ECONNRESET;
808 		so2->so_state &= ~SS_ISCONNECTED;
809 
810 		sounlock(so2);
811 	}
812 
813 	sounlock(so);
814 	refcnt_finalize(&unp->unp_refcnt, "unpfinal");
815 	solock(so);
816 
817 	soisdisconnected(so);
818 	so->so_pcb = NULL;
819 	m_freem(unp->unp_addr);
820 	pool_put(&unpcb_pool, unp);
821 	if (unp_rights)
822 		task_add(systqmp, &unp_gc_task);
823 }
824 
825 int
826 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
827 {
828 	struct sockaddr_un *soun;
829 	struct vnode *vp;
830 	struct socket *so2, *so3;
831 	struct unpcb *unp, *unp2, *unp3;
832 	struct nameidata nd;
833 	int error;
834 
835 	unp = sotounpcb(so);
836 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
837 		return (EISCONN);
838 	if ((error = unp_nam2sun(nam, &soun, NULL)))
839 		return (error);
840 
841 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
842 	nd.ni_pledge = PLEDGE_UNIX;
843 	nd.ni_unveil = UNVEIL_WRITE;
844 
845 	unp->unp_flags |= UNP_CONNECTING;
846 
847 	/*
848 	 * Enforce `i_lock' -> `solock' because fifo subsystem
849 	 * requires it. The socket can't be closed concurrently
850 	 * because the file descriptor reference is still held.
851 	 */
852 
853 	sounlock(so);
854 
855 	KERNEL_LOCK();
856 	error = namei(&nd);
857 	if (error != 0)
858 		goto unlock;
859 	vp = nd.ni_vp;
860 	if (vp->v_type != VSOCK) {
861 		error = ENOTSOCK;
862 		goto put;
863 	}
864 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
865 		goto put;
866 	so2 = vp->v_socket;
867 	if (so2 == NULL) {
868 		error = ECONNREFUSED;
869 		goto put;
870 	}
871 	if (so->so_type != so2->so_type) {
872 		error = EPROTOTYPE;
873 		goto put;
874 	}
875 
876 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
877 		solock(so2);
878 
879 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
880 		    (so3 = sonewconn(so2, 0, M_WAIT)) == NULL) {
881 			error = ECONNREFUSED;
882 		}
883 
884 		sounlock(so2);
885 
886 		if (error != 0)
887 			goto put;
888 
889 		/*
890 		 * Since `so2' is protected by vnode(9) lock, `so3'
891 		 * can't be PRU_ABORT'ed here.
892 		 */
893 		solock_pair(so, so3);
894 
895 		unp2 = sotounpcb(so2);
896 		unp3 = sotounpcb(so3);
897 
898 		/*
899 		 * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag
900 		 * are immutable since we set them in uipc_bind().
901 		 */
902 		if (unp2->unp_addr)
903 			unp3->unp_addr =
904 			    m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
905 		unp3->unp_connid.uid = p->p_ucred->cr_uid;
906 		unp3->unp_connid.gid = p->p_ucred->cr_gid;
907 		unp3->unp_connid.pid = p->p_p->ps_pid;
908 		unp3->unp_flags |= UNP_FEIDS;
909 
910 		if (unp2->unp_flags & UNP_FEIDSBIND) {
911 			unp->unp_connid = unp2->unp_connid;
912 			unp->unp_flags |= UNP_FEIDS;
913 		}
914 
915 		so2 = so3;
916 	} else {
917 		if (so2 != so)
918 			solock_pair(so, so2);
919 		else
920 			solock(so);
921 	}
922 
923 	error = unp_connect2(so, so2);
924 
925 	sounlock(so);
926 
927 	/*
928 	 * `so2' can't be PRU_ABORT'ed concurrently
929 	 */
930 	if (so2 != so)
931 		sounlock(so2);
932 put:
933 	vput(vp);
934 unlock:
935 	KERNEL_UNLOCK();
936 	solock(so);
937 	unp->unp_flags &= ~UNP_CONNECTING;
938 
939 	/*
940 	 * The peer socket could be closed by concurrent thread
941 	 * when `so' and `vp' are unlocked.
942 	 */
943 	if (error == 0 && unp->unp_conn == NULL)
944 		error = ECONNREFUSED;
945 
946 	return (error);
947 }
948 
949 int
950 unp_connect2(struct socket *so, struct socket *so2)
951 {
952 	struct unpcb *unp = sotounpcb(so);
953 	struct unpcb *unp2;
954 
955 	soassertlocked(so);
956 	soassertlocked(so2);
957 
958 	if (so2->so_type != so->so_type)
959 		return (EPROTOTYPE);
960 	unp2 = sotounpcb(so2);
961 	unp->unp_conn = unp2;
962 	switch (so->so_type) {
963 
964 	case SOCK_DGRAM:
965 		SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
966 		soisconnected(so);
967 		break;
968 
969 	case SOCK_STREAM:
970 	case SOCK_SEQPACKET:
971 		unp2->unp_conn = unp;
972 		soisconnected(so);
973 		soisconnected(so2);
974 		break;
975 
976 	default:
977 		panic("unp_connect2");
978 	}
979 	return (0);
980 }
981 
982 void
983 unp_disconnect(struct unpcb *unp)
984 {
985 	struct socket *so2;
986 	struct unpcb *unp2;
987 
988 	if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
989 		return;
990 
991 	unp2 = unp->unp_conn;
992 	unp->unp_conn = NULL;
993 
994 	switch (unp->unp_socket->so_type) {
995 
996 	case SOCK_DGRAM:
997 		SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
998 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
999 		break;
1000 
1001 	case SOCK_STREAM:
1002 	case SOCK_SEQPACKET:
1003 		unp->unp_socket->so_snd.sb_mbcnt = 0;
1004 		unp->unp_socket->so_snd.sb_cc = 0;
1005 		soisdisconnected(unp->unp_socket);
1006 		unp2->unp_conn = NULL;
1007 		unp2->unp_socket->so_snd.sb_mbcnt = 0;
1008 		unp2->unp_socket->so_snd.sb_cc = 0;
1009 		soisdisconnected(unp2->unp_socket);
1010 		break;
1011 	}
1012 
1013 	if (so2 != unp->unp_socket)
1014 		sounlock(so2);
1015 }
1016 
1017 static struct unpcb *
1018 fptounp(struct file *fp)
1019 {
1020 	struct socket *so;
1021 
1022 	if (fp->f_type != DTYPE_SOCKET)
1023 		return (NULL);
1024 	if ((so = fp->f_data) == NULL)
1025 		return (NULL);
1026 	if (so->so_proto->pr_domain != &unixdomain)
1027 		return (NULL);
1028 	return (sotounpcb(so));
1029 }
1030 
1031 int
1032 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
1033 {
1034 	struct proc *p = curproc;		/* XXX */
1035 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1036 	struct filedesc *fdp = p->p_fd;
1037 	int i, *fds = NULL;
1038 	struct fdpass *rp;
1039 	struct file *fp;
1040 	int nfds, error = 0;
1041 
1042 	/*
1043 	 * This code only works because SCM_RIGHTS is the only supported
1044 	 * control message type on unix sockets. Enforce this here.
1045 	 */
1046 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
1047 		return EINVAL;
1048 
1049 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1050 	    sizeof(struct fdpass);
1051 	if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
1052 		controllen = 0;
1053 	else
1054 		controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
1055 	if (nfds > controllen / sizeof(int)) {
1056 		error = EMSGSIZE;
1057 		goto out;
1058 	}
1059 
1060 	/* Make sure the recipient should be able to see the descriptors.. */
1061 	rp = (struct fdpass *)CMSG_DATA(cm);
1062 
1063 	/* fdp->fd_rdir requires KERNEL_LOCK() */
1064 	KERNEL_LOCK();
1065 
1066 	for (i = 0; i < nfds; i++) {
1067 		fp = rp->fp;
1068 		rp++;
1069 		error = pledge_recvfd(p, fp);
1070 		if (error)
1071 			break;
1072 
1073 		/*
1074 		 * No to block devices.  If passing a directory,
1075 		 * make sure that it is underneath the root.
1076 		 */
1077 		if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
1078 			struct vnode *vp = (struct vnode *)fp->f_data;
1079 
1080 			if (vp->v_type == VBLK ||
1081 			    (vp->v_type == VDIR &&
1082 			    !vn_isunder(vp, fdp->fd_rdir, p))) {
1083 				error = EPERM;
1084 				break;
1085 			}
1086 		}
1087 	}
1088 
1089 	KERNEL_UNLOCK();
1090 
1091 	if (error)
1092 		goto out;
1093 
1094 	fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
1095 
1096 	fdplock(fdp);
1097 restart:
1098 	/*
1099 	 * First loop -- allocate file descriptor table slots for the
1100 	 * new descriptors.
1101 	 */
1102 	rp = ((struct fdpass *)CMSG_DATA(cm));
1103 	for (i = 0; i < nfds; i++) {
1104 		if ((error = fdalloc(p, 0, &fds[i])) != 0) {
1105 			/*
1106 			 * Back out what we've done so far.
1107 			 */
1108 			for (--i; i >= 0; i--)
1109 				fdremove(fdp, fds[i]);
1110 
1111 			if (error == ENOSPC) {
1112 				fdexpand(p);
1113 				goto restart;
1114 			}
1115 
1116 			fdpunlock(fdp);
1117 
1118 			/*
1119 			 * This is the error that has historically
1120 			 * been returned, and some callers may
1121 			 * expect it.
1122 			 */
1123 
1124 			error = EMSGSIZE;
1125 			goto out;
1126 		}
1127 
1128 		/*
1129 		 * Make the slot reference the descriptor so that
1130 		 * fdalloc() works properly.. We finalize it all
1131 		 * in the loop below.
1132 		 */
1133 		mtx_enter(&fdp->fd_fplock);
1134 		KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
1135 		fdp->fd_ofiles[fds[i]] = rp->fp;
1136 		mtx_leave(&fdp->fd_fplock);
1137 
1138 		fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
1139 		if (flags & MSG_CMSG_CLOEXEC)
1140 			fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
1141 
1142 		rp++;
1143 	}
1144 
1145 	/*
1146 	 * Keep `fdp' locked to prevent concurrent close() of just
1147 	 * inserted descriptors. Such descriptors could have the only
1148 	 * `f_count' reference which is now shared between control
1149 	 * message and `fdp'.
1150 	 */
1151 
1152 	/*
1153 	 * Now that adding them has succeeded, update all of the
1154 	 * descriptor passing state.
1155 	 */
1156 	rp = (struct fdpass *)CMSG_DATA(cm);
1157 
1158 	for (i = 0; i < nfds; i++) {
1159 		struct unpcb *unp;
1160 
1161 		fp = rp->fp;
1162 		rp++;
1163 		if ((unp = fptounp(fp)) != NULL) {
1164 			rw_enter_write(&unp_gc_lock);
1165 			unp->unp_msgcount--;
1166 			rw_exit_write(&unp_gc_lock);
1167 		}
1168 	}
1169 	fdpunlock(fdp);
1170 
1171 	mtx_enter(&unp_rights_mtx);
1172 	unp_rights -= nfds;
1173 	mtx_leave(&unp_rights_mtx);
1174 
1175 	/*
1176 	 * Copy temporary array to message and adjust length, in case of
1177 	 * transition from large struct file pointers to ints.
1178 	 */
1179 	memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
1180 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
1181 	rights->m_len = CMSG_LEN(nfds * sizeof(int));
1182  out:
1183 	if (fds != NULL)
1184 		free(fds, M_TEMP, nfds * sizeof(int));
1185 
1186 	if (error) {
1187 		if (nfds > 0) {
1188 			/*
1189 			 * No lock required. We are the only `cm' holder.
1190 			 */
1191 			rp = ((struct fdpass *)CMSG_DATA(cm));
1192 			unp_discard(rp, nfds);
1193 		}
1194 	}
1195 
1196 	return (error);
1197 }
1198 
1199 int
1200 unp_internalize(struct mbuf *control, struct proc *p)
1201 {
1202 	struct filedesc *fdp = p->p_fd;
1203 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1204 	struct fdpass *rp;
1205 	struct file *fp;
1206 	struct unpcb *unp;
1207 	int i, error;
1208 	int nfds, *ip, fd, neededspace;
1209 
1210 	/*
1211 	 * Check for two potential msg_controllen values because
1212 	 * IETF stuck their nose in a place it does not belong.
1213 	 */
1214 	if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
1215 		return (EINVAL);
1216 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1217 	    !(cm->cmsg_len == control->m_len ||
1218 	    control->m_len == CMSG_ALIGN(cm->cmsg_len)))
1219 		return (EINVAL);
1220 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
1221 
1222 	mtx_enter(&unp_rights_mtx);
1223 	if (unp_rights + nfds > maxfiles / 10) {
1224 		mtx_leave(&unp_rights_mtx);
1225 		return (EMFILE);
1226 	}
1227 	unp_rights += nfds;
1228 	mtx_leave(&unp_rights_mtx);
1229 
1230 	/* Make sure we have room for the struct file pointers */
1231 morespace:
1232 	neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
1233 	    control->m_len;
1234 	if (neededspace > m_trailingspace(control)) {
1235 		char *tmp;
1236 		/* if we already have a cluster, the message is just too big */
1237 		if (control->m_flags & M_EXT) {
1238 			error = E2BIG;
1239 			goto nospace;
1240 		}
1241 
1242 		/* copy cmsg data temporarily out of the mbuf */
1243 		tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
1244 		memcpy(tmp, mtod(control, caddr_t), control->m_len);
1245 
1246 		/* allocate a cluster and try again */
1247 		MCLGET(control, M_WAIT);
1248 		if ((control->m_flags & M_EXT) == 0) {
1249 			free(tmp, M_TEMP, control->m_len);
1250 			error = ENOBUFS;       /* allocation failed */
1251 			goto nospace;
1252 		}
1253 
1254 		/* copy the data back into the cluster */
1255 		cm = mtod(control, struct cmsghdr *);
1256 		memcpy(cm, tmp, control->m_len);
1257 		free(tmp, M_TEMP, control->m_len);
1258 		goto morespace;
1259 	}
1260 
1261 	/* adjust message & mbuf to note amount of space actually used. */
1262 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
1263 	control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
1264 
1265 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
1266 	rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
1267 	fdplock(fdp);
1268 	for (i = 0; i < nfds; i++) {
1269 		memcpy(&fd, ip, sizeof fd);
1270 		ip--;
1271 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
1272 			error = EBADF;
1273 			goto fail;
1274 		}
1275 		if (fp->f_count >= FDUP_MAX_COUNT) {
1276 			error = EDEADLK;
1277 			goto fail;
1278 		}
1279 		error = pledge_sendfd(p, fp);
1280 		if (error)
1281 			goto fail;
1282 
1283 		/* kqueue descriptors cannot be copied */
1284 		if (fp->f_type == DTYPE_KQUEUE) {
1285 			error = EINVAL;
1286 			goto fail;
1287 		}
1288 #if NKCOV > 0
1289 		/* kcov descriptors cannot be copied */
1290 		if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) {
1291 			error = EINVAL;
1292 			goto fail;
1293 		}
1294 #endif
1295 		rp->fp = fp;
1296 		rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
1297 		rp--;
1298 		if ((unp = fptounp(fp)) != NULL) {
1299 			rw_enter_write(&unp_gc_lock);
1300 			unp->unp_msgcount++;
1301 			unp->unp_file = fp;
1302 			rw_exit_write(&unp_gc_lock);
1303 		}
1304 	}
1305 	fdpunlock(fdp);
1306 	return (0);
1307 fail:
1308 	fdpunlock(fdp);
1309 	if (fp != NULL)
1310 		FRELE(fp, p);
1311 	/* Back out what we just did. */
1312 	for ( ; i > 0; i--) {
1313 		rp++;
1314 		fp = rp->fp;
1315 		if ((unp = fptounp(fp)) != NULL) {
1316 			rw_enter_write(&unp_gc_lock);
1317 			unp->unp_msgcount--;
1318 			rw_exit_write(&unp_gc_lock);
1319 		}
1320 		FRELE(fp, p);
1321 	}
1322 
1323 nospace:
1324 	mtx_enter(&unp_rights_mtx);
1325 	unp_rights -= nfds;
1326 	mtx_leave(&unp_rights_mtx);
1327 
1328 	return (error);
1329 }
1330 
1331 void
1332 unp_gc(void *arg __unused)
1333 {
1334 	struct unp_deferral *defer;
1335 	struct file *fp;
1336 	struct socket *so;
1337 	struct unpcb *unp;
1338 	int nunref, i;
1339 
1340 	rw_enter_write(&unp_gc_lock);
1341 	if (unp_gcing)
1342 		goto unlock;
1343 	unp_gcing = 1;
1344 	rw_exit_write(&unp_gc_lock);
1345 
1346 	rw_enter_write(&unp_df_lock);
1347 	/* close any fds on the deferred list */
1348 	while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
1349 		SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
1350 		rw_exit_write(&unp_df_lock);
1351 		for (i = 0; i < defer->ud_n; i++) {
1352 			fp = defer->ud_fp[i].fp;
1353 			if (fp == NULL)
1354 				continue;
1355 			if ((unp = fptounp(fp)) != NULL) {
1356 				rw_enter_write(&unp_gc_lock);
1357 				unp->unp_msgcount--;
1358 				rw_exit_write(&unp_gc_lock);
1359 			}
1360 			mtx_enter(&unp_rights_mtx);
1361 			unp_rights--;
1362 			mtx_leave(&unp_rights_mtx);
1363 			 /* closef() expects a refcount of 2 */
1364 			FREF(fp);
1365 			(void) closef(fp, NULL);
1366 		}
1367 		free(defer, M_TEMP, sizeof(*defer) +
1368 		    sizeof(struct fdpass) * defer->ud_n);
1369 		rw_enter_write(&unp_df_lock);
1370 	}
1371 	rw_exit_write(&unp_df_lock);
1372 
1373 	nunref = 0;
1374 
1375 	rw_enter_write(&unp_gc_lock);
1376 
1377 	/*
1378 	 * Determine sockets which may be prospectively dead. Such
1379 	 * sockets have their `unp_msgcount' equal to the `f_count'.
1380 	 * If `unp_msgcount' is 0, the socket has not been passed
1381 	 * and can't be unreferenced.
1382 	 */
1383 	LIST_FOREACH(unp, &unp_head, unp_link) {
1384 		unp->unp_gcflags = 0;
1385 
1386 		if (unp->unp_msgcount == 0)
1387 			continue;
1388 		if ((fp = unp->unp_file) == NULL)
1389 			continue;
1390 		if (fp->f_count == unp->unp_msgcount) {
1391 			unp->unp_gcflags |= UNP_GCDEAD;
1392 			unp->unp_gcrefs = unp->unp_msgcount;
1393 			nunref++;
1394 		}
1395 	}
1396 
1397 	/*
1398 	 * Scan all sockets previously marked as dead. Remove
1399 	 * the `unp_gcrefs' reference each socket holds on any
1400 	 * dead socket in its buffer.
1401 	 */
1402 	LIST_FOREACH(unp, &unp_head, unp_link) {
1403 		if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1404 			continue;
1405 		so = unp->unp_socket;
1406 		mtx_enter(&so->so_rcv.sb_mtx);
1407 		unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
1408 		mtx_leave(&so->so_rcv.sb_mtx);
1409 	}
1410 
1411 	/*
1412 	 * If the dead socket has `unp_gcrefs' reference counter
1413 	 * greater than 0, it can't be unreferenced. Mark it as
1414 	 * alive and increment the `unp_gcrefs' reference for each
1415 	 * dead socket within its buffer. Repeat this until we
1416 	 * have no new alive sockets found.
1417 	 */
1418 	do {
1419 		unp_defer = 0;
1420 
1421 		LIST_FOREACH(unp, &unp_head, unp_link) {
1422 			if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1423 				continue;
1424 			if (unp->unp_gcrefs == 0)
1425 				continue;
1426 
1427 			unp->unp_gcflags &= ~UNP_GCDEAD;
1428 
1429 			so = unp->unp_socket;
1430 			mtx_enter(&so->so_rcv.sb_mtx);
1431 			unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
1432 			mtx_leave(&so->so_rcv.sb_mtx);
1433 
1434 			KASSERT(nunref > 0);
1435 			nunref--;
1436 		}
1437 	} while (unp_defer > 0);
1438 
1439 	/*
1440 	 * If there are any unreferenced sockets, then for each dispose
1441 	 * of files in its receive buffer and then close it.
1442 	 */
1443 	if (nunref) {
1444 		LIST_FOREACH(unp, &unp_head, unp_link) {
1445 			if (unp->unp_gcflags & UNP_GCDEAD) {
1446 				/*
1447 				 * This socket could still be connected
1448 				 * and if so it's `so_rcv' is still
1449 				 * accessible by concurrent PRU_SEND
1450 				 * thread.
1451 				 */
1452 				so = unp->unp_socket;
1453 				solock(so);
1454 				sorflush(so);
1455 				sounlock(so);
1456 			}
1457 		}
1458 	}
1459 
1460 	unp_gcing = 0;
1461 unlock:
1462 	rw_exit_write(&unp_gc_lock);
1463 }
1464 
1465 void
1466 unp_dispose(struct mbuf *m)
1467 {
1468 
1469 	if (m)
1470 		unp_scan(m, unp_discard);
1471 }
1472 
1473 void
1474 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1475 {
1476 	struct mbuf *m;
1477 	struct fdpass *rp;
1478 	struct cmsghdr *cm;
1479 	int qfds;
1480 
1481 	while (m0) {
1482 		for (m = m0; m; m = m->m_next) {
1483 			if (m->m_type == MT_CONTROL &&
1484 			    m->m_len >= sizeof(*cm)) {
1485 				cm = mtod(m, struct cmsghdr *);
1486 				if (cm->cmsg_level != SOL_SOCKET ||
1487 				    cm->cmsg_type != SCM_RIGHTS)
1488 					continue;
1489 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1490 				    / sizeof(struct fdpass);
1491 				if (qfds > 0) {
1492 					rp = (struct fdpass *)CMSG_DATA(cm);
1493 					op(rp, qfds);
1494 				}
1495 				break;		/* XXX, but saves time */
1496 			}
1497 		}
1498 		m0 = m0->m_nextpkt;
1499 	}
1500 }
1501 
1502 void
1503 unp_discard(struct fdpass *rp, int nfds)
1504 {
1505 	struct unp_deferral *defer;
1506 
1507 	/* copy the file pointers to a deferral structure */
1508 	defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1509 	defer->ud_n = nfds;
1510 	memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1511 	memset(rp, 0, sizeof(*rp) * nfds);
1512 
1513 	rw_enter_write(&unp_df_lock);
1514 	SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1515 	rw_exit_write(&unp_df_lock);
1516 
1517 	task_add(systqmp, &unp_gc_task);
1518 }
1519 
1520 void
1521 unp_remove_gcrefs(struct fdpass *rp, int nfds)
1522 {
1523 	struct unpcb *unp;
1524 	int i;
1525 
1526 	rw_assert_wrlock(&unp_gc_lock);
1527 
1528 	for (i = 0; i < nfds; i++) {
1529 		if (rp[i].fp == NULL)
1530 			continue;
1531 		if ((unp = fptounp(rp[i].fp)) == NULL)
1532 			continue;
1533 		if (unp->unp_gcflags & UNP_GCDEAD) {
1534 			KASSERT(unp->unp_gcrefs > 0);
1535 			unp->unp_gcrefs--;
1536 		}
1537 	}
1538 }
1539 
1540 void
1541 unp_restore_gcrefs(struct fdpass *rp, int nfds)
1542 {
1543 	struct unpcb *unp;
1544 	int i;
1545 
1546 	rw_assert_wrlock(&unp_gc_lock);
1547 
1548 	for (i = 0; i < nfds; i++) {
1549 		if (rp[i].fp == NULL)
1550 			continue;
1551 		if ((unp = fptounp(rp[i].fp)) == NULL)
1552 			continue;
1553 		if (unp->unp_gcflags & UNP_GCDEAD) {
1554 			unp->unp_gcrefs++;
1555 			unp_defer++;
1556 		}
1557 	}
1558 }
1559 
1560 int
1561 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1562 {
1563 	struct sockaddr *sa = mtod(nam, struct sockaddr *);
1564 	size_t size, len;
1565 
1566 	if (nam->m_len < offsetof(struct sockaddr, sa_data))
1567 		return EINVAL;
1568 	if (sa->sa_family != AF_UNIX)
1569 		return EAFNOSUPPORT;
1570 	if (sa->sa_len != nam->m_len)
1571 		return EINVAL;
1572 	if (sa->sa_len > sizeof(struct sockaddr_un))
1573 		return EINVAL;
1574 	*sun = (struct sockaddr_un *)sa;
1575 
1576 	/* ensure that sun_path is NUL terminated and fits */
1577 	size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1578 	len = strnlen((*sun)->sun_path, size);
1579 	if (len == sizeof((*sun)->sun_path))
1580 		return EINVAL;
1581 	if (len == size) {
1582 		if (m_trailingspace(nam) == 0)
1583 			return EINVAL;
1584 		nam->m_len++;
1585 		(*sun)->sun_len++;
1586 		(*sun)->sun_path[len] = '\0';
1587 	}
1588 	if (pathlen != NULL)
1589 		*pathlen = len;
1590 
1591 	return 0;
1592 }
1593