xref: /openbsd-src/sys/kern/uipc_usrreq.c (revision 5a8609ffb78a9b593566d7adc2b54cc37adee070)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.193 2022/11/15 22:47:15 mvs Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 #include <sys/rwlock.h>
55 #include <sys/mutex.h>
56 #include <sys/sysctl.h>
57 #include <sys/lock.h>
58 #include <sys/refcnt.h>
59 
60 #include "kcov.h"
61 #if NKCOV > 0
62 #include <sys/kcov.h>
63 #endif
64 
65 /*
66  * Locks used to protect global data and struct members:
67  *      I       immutable after creation
68  *      D       unp_df_lock
69  *      G       unp_gc_lock
70  *      M       unp_ino_mtx
71  *      R       unp_rights_mtx
72  *      a       atomic
73  *      s       socket lock
74  */
75 
76 struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock");
77 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
78 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
79 
80 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
81 struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
82 
83 /*
84  * Stack of sets of files that were passed over a socket but were
85  * not received and need to be closed.
86  */
87 struct	unp_deferral {
88 	SLIST_ENTRY(unp_deferral)	ud_link;	/* [D] */
89 	int				ud_n;		/* [I] */
90 	/* followed by ud_n struct fdpass */
91 	struct fdpass			ud_fp[];	/* [I] */
92 };
93 
94 void	uipc_setaddr(const struct unpcb *, struct mbuf *);
95 void	unp_discard(struct fdpass *, int);
96 void	unp_remove_gcrefs(struct fdpass *, int);
97 void	unp_restore_gcrefs(struct fdpass *, int);
98 void	unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
99 int	unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
100 static inline void unp_ref(struct unpcb *);
101 static inline void unp_rele(struct unpcb *);
102 struct socket *unp_solock_peer(struct socket *);
103 
104 struct pool unpcb_pool;
105 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
106 
107 /*
108  * Unix communications domain.
109  *
110  * TODO:
111  *	RDM
112  *	rethink name space problems
113  *	need a proper out-of-band
114  */
115 const struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
116 
117 /* [G] list of all UNIX domain sockets, for unp_gc() */
118 LIST_HEAD(unp_head, unpcb)	unp_head =
119 	LIST_HEAD_INITIALIZER(unp_head);
120 /* [D] list of sets of files that were sent over sockets that are now closed */
121 SLIST_HEAD(,unp_deferral)	unp_deferred =
122 	SLIST_HEAD_INITIALIZER(unp_deferred);
123 
124 ino_t	unp_ino;	/* [U] prototype for fake inode numbers */
125 int	unp_rights;	/* [R] file descriptors in flight */
126 int	unp_defer;	/* [G] number of deferred fp to close by the GC task */
127 int	unp_gcing;	/* [G] GC task currently running */
128 
129 const struct pr_usrreqs uipc_usrreqs = {
130 	.pru_attach	= uipc_attach,
131 	.pru_detach	= uipc_detach,
132 	.pru_bind	= uipc_bind,
133 	.pru_listen	= uipc_listen,
134 	.pru_connect	= uipc_connect,
135 	.pru_accept	= uipc_accept,
136 	.pru_disconnect	= uipc_disconnect,
137 	.pru_shutdown	= uipc_shutdown,
138 	.pru_rcvd	= uipc_rcvd,
139 	.pru_send	= uipc_send,
140 	.pru_abort	= uipc_abort,
141 	.pru_sense	= uipc_sense,
142 	.pru_sockaddr	= uipc_sockaddr,
143 	.pru_peeraddr	= uipc_peeraddr,
144 	.pru_connect2	= uipc_connect2,
145 };
146 
147 const struct pr_usrreqs uipc_dgram_usrreqs = {
148 	.pru_attach	= uipc_attach,
149 	.pru_detach	= uipc_detach,
150 	.pru_bind	= uipc_bind,
151 	.pru_listen	= uipc_listen,
152 	.pru_connect	= uipc_connect,
153 	.pru_disconnect	= uipc_disconnect,
154 	.pru_shutdown	= uipc_dgram_shutdown,
155 	.pru_send	= uipc_dgram_send,
156 	.pru_sense	= uipc_sense,
157 	.pru_sockaddr	= uipc_sockaddr,
158 	.pru_peeraddr	= uipc_peeraddr,
159 	.pru_connect2	= uipc_connect2,
160 };
161 
162 void
163 unp_init(void)
164 {
165 	pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
166 	    IPL_SOFTNET, 0, "unpcb", NULL);
167 }
168 
169 static inline void
170 unp_ref(struct unpcb *unp)
171 {
172 	refcnt_take(&unp->unp_refcnt);
173 }
174 
175 static inline void
176 unp_rele(struct unpcb *unp)
177 {
178 	refcnt_rele_wake(&unp->unp_refcnt);
179 }
180 
181 struct socket *
182 unp_solock_peer(struct socket *so)
183 {
184 	struct unpcb *unp, *unp2;
185 	struct socket *so2;
186 
187 	unp = so->so_pcb;
188 
189 again:
190 	if ((unp2 = unp->unp_conn) == NULL)
191 		return NULL;
192 
193 	so2 = unp2->unp_socket;
194 
195 	if (so < so2)
196 		solock(so2);
197 	else if (so > so2) {
198 		unp_ref(unp2);
199 		sounlock(so);
200 		solock(so2);
201 		solock(so);
202 
203 		/* Datagram socket could be reconnected due to re-lock. */
204 		if (unp->unp_conn != unp2) {
205 			sounlock(so2);
206 			unp_rele(unp2);
207 			goto again;
208 		}
209 
210 		unp_rele(unp2);
211 	}
212 
213 	return so2;
214 }
215 
216 void
217 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
218 {
219 	if (unp != NULL && unp->unp_addr != NULL) {
220 		nam->m_len = unp->unp_addr->m_len;
221 		memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
222 		    nam->m_len);
223 	} else {
224 		nam->m_len = sizeof(sun_noname);
225 		memcpy(mtod(nam, struct sockaddr *), &sun_noname,
226 		    nam->m_len);
227 	}
228 }
229 
230 /*
231  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
232  * for stream sockets, although the total for sender and receiver is
233  * actually only PIPSIZ.
234  * Datagram sockets really use the sendspace as the maximum datagram size,
235  * and don't really want to reserve the sendspace.  Their recvspace should
236  * be large enough for at least one max-size datagram plus address.
237  */
238 #define	PIPSIZ	8192
239 u_int	unpst_sendspace = PIPSIZ;
240 u_int	unpst_recvspace = PIPSIZ;
241 u_int	unpsq_sendspace = PIPSIZ;
242 u_int	unpsq_recvspace = PIPSIZ;
243 u_int	unpdg_sendspace = 2*1024;	/* really max datagram size */
244 u_int	unpdg_recvspace = 16*1024;
245 
246 const struct sysctl_bounded_args unpstctl_vars[] = {
247 	{ UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX },
248 	{ UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX },
249 };
250 const struct sysctl_bounded_args unpsqctl_vars[] = {
251 	{ UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX },
252 	{ UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX },
253 };
254 const struct sysctl_bounded_args unpdgctl_vars[] = {
255 	{ UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX },
256 	{ UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX },
257 };
258 
259 int
260 uipc_attach(struct socket *so, int proto, int wait)
261 {
262 	struct unpcb *unp;
263 	int error;
264 
265 	if (so->so_pcb)
266 		return EISCONN;
267 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
268 		switch (so->so_type) {
269 
270 		case SOCK_STREAM:
271 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
272 			break;
273 
274 		case SOCK_SEQPACKET:
275 			error = soreserve(so, unpsq_sendspace, unpsq_recvspace);
276 			break;
277 
278 		case SOCK_DGRAM:
279 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
280 			break;
281 
282 		default:
283 			panic("unp_attach");
284 		}
285 		if (error)
286 			return (error);
287 	}
288 	unp = pool_get(&unpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
289 	    PR_ZERO);
290 	if (unp == NULL)
291 		return (ENOBUFS);
292 	refcnt_init(&unp->unp_refcnt);
293 	unp->unp_socket = so;
294 	so->so_pcb = unp;
295 	getnanotime(&unp->unp_ctime);
296 
297 	/*
298 	 * Enforce `unp_gc_lock' -> `solock()' lock order.
299 	 */
300 	sounlock(so);
301 	rw_enter_write(&unp_gc_lock);
302 	LIST_INSERT_HEAD(&unp_head, unp, unp_link);
303 	rw_exit_write(&unp_gc_lock);
304 	solock(so);
305 	return (0);
306 }
307 
308 int
309 uipc_detach(struct socket *so)
310 {
311 	struct unpcb *unp = sotounpcb(so);
312 
313 	if (unp == NULL)
314 		return (EINVAL);
315 
316 	unp_detach(unp);
317 
318 	return (0);
319 }
320 
321 int
322 uipc_bind(struct socket *so, struct mbuf *nam, struct proc *p)
323 {
324 	struct unpcb *unp = sotounpcb(so);
325 
326 	return unp_bind(unp, nam, p);
327 }
328 
329 int
330 uipc_listen(struct socket *so)
331 {
332 	struct unpcb *unp = sotounpcb(so);
333 
334 	if (unp->unp_vnode == NULL)
335 		return (EINVAL);
336 	return (0);
337 }
338 
339 int
340 uipc_connect(struct socket *so, struct mbuf *nam)
341 {
342 	return unp_connect(so, nam, curproc);
343 }
344 
345 int
346 uipc_accept(struct socket *so, struct mbuf *nam)
347 {
348 	struct socket *so2;
349 	struct unpcb *unp = sotounpcb(so);
350 
351 	/*
352 	 * Pass back name of connected socket, if it was bound and
353 	 * we are still connected (our peer may have closed already!).
354 	 */
355 	so2 = unp_solock_peer(so);
356 	uipc_setaddr(unp->unp_conn, nam);
357 
358 	if (so2 != NULL && so2 != so)
359 		sounlock(so2);
360 	return (0);
361 }
362 
363 int
364 uipc_disconnect(struct socket *so)
365 {
366 	struct unpcb *unp = sotounpcb(so);
367 
368 	unp_disconnect(unp);
369 	return (0);
370 }
371 
372 int
373 uipc_shutdown(struct socket *so)
374 {
375 	struct unpcb *unp = sotounpcb(so);
376 	struct socket *so2;
377 
378 	socantsendmore(so);
379 
380 	if ((so2 = unp_solock_peer(unp->unp_socket))){
381 		socantrcvmore(so2);
382 		sounlock(so2);
383 	}
384 
385 	return (0);
386 }
387 
388 int
389 uipc_dgram_shutdown(struct socket *so)
390 {
391 	socantsendmore(so);
392 	return (0);
393 }
394 
395 void
396 uipc_rcvd(struct socket *so)
397 {
398 	struct socket *so2;
399 
400 	if ((so2 = unp_solock_peer(so)) == NULL)
401 		return;
402 	/*
403 	 * Adjust backpressure on sender
404 	 * and wakeup any waiting to write.
405 	 */
406 	so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
407 	so2->so_snd.sb_cc = so->so_rcv.sb_cc;
408 	sowwakeup(so2);
409 	sounlock(so2);
410 }
411 
412 int
413 uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
414     struct mbuf *control)
415 {
416 	struct socket *so2;
417 	int error = 0;
418 
419 	if (control) {
420 		sounlock(so);
421 		error = unp_internalize(control, curproc);
422 		solock(so);
423 		if (error)
424 			goto out;
425 	}
426 
427 	if (so->so_state & SS_CANTSENDMORE) {
428 		error = EPIPE;
429 		goto dispose;
430 	}
431 	if ((so2 = unp_solock_peer(so)) == NULL) {
432 		error = ENOTCONN;
433 		goto dispose;
434 	}
435 
436 	/*
437 	 * Send to paired receive port, and then raise
438 	 * send buffer counts to maintain backpressure.
439 	 * Wake up readers.
440 	 */
441 	if (control) {
442 		if (sbappendcontrol(so2, &so2->so_rcv, m, control)) {
443 			control = NULL;
444 		} else {
445 			sounlock(so2);
446 			error = ENOBUFS;
447 			goto dispose;
448 		}
449 	} else if (so->so_type == SOCK_SEQPACKET)
450 		sbappendrecord(so2, &so2->so_rcv, m);
451 	else
452 		sbappend(so2, &so2->so_rcv, m);
453 	so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
454 	so->so_snd.sb_cc = so2->so_rcv.sb_cc;
455 	if (so2->so_rcv.sb_cc > 0)
456 		sorwakeup(so2);
457 
458 	sounlock(so2);
459 	m = NULL;
460 
461 dispose:
462 	/* we need to undo unp_internalize in case of errors */
463 	if (control && error)
464 		unp_dispose(control);
465 
466 out:
467 	m_freem(control);
468 	m_freem(m);
469 
470 	return (error);
471 }
472 
473 int
474 uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
475     struct mbuf *control)
476 {
477 	struct unpcb *unp = sotounpcb(so);
478 	struct socket *so2;
479 	const struct sockaddr *from;
480 	int error = 0;
481 
482 	if (control) {
483 		sounlock(so);
484 		error = unp_internalize(control, curproc);
485 		solock(so);
486 		if (error)
487 			goto out;
488 	}
489 
490 	if (nam) {
491 		if (unp->unp_conn) {
492 			error = EISCONN;
493 			goto dispose;
494 		}
495 		error = unp_connect(so, nam, curproc);
496 		if (error)
497 			goto dispose;
498 	}
499 
500 	if ((so2 = unp_solock_peer(so)) == NULL) {
501 		if (nam != NULL)
502 			error = ECONNREFUSED;
503 		else
504 			error = ENOTCONN;
505 		goto dispose;
506 	}
507 
508 	if (unp->unp_addr)
509 		from = mtod(unp->unp_addr, struct sockaddr *);
510 	else
511 		from = &sun_noname;
512 	if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
513 		sorwakeup(so2);
514 		m = NULL;
515 		control = NULL;
516 	} else
517 		error = ENOBUFS;
518 
519 	if (so2 != so)
520 		sounlock(so2);
521 
522 	if (nam)
523 		unp_disconnect(unp);
524 
525 dispose:
526 	/* we need to undo unp_internalize in case of errors */
527 	if (control && error)
528 		unp_dispose(control);
529 
530 out:
531 	m_freem(control);
532 	m_freem(m);
533 
534 	return (error);
535 }
536 
537 void
538 uipc_abort(struct socket *so)
539 {
540 	struct unpcb *unp = sotounpcb(so);
541 
542 	unp_detach(unp);
543 	sofree(so, 0);
544 }
545 
546 int
547 uipc_sense(struct socket *so, struct stat *sb)
548 {
549 	struct unpcb *unp = sotounpcb(so);
550 
551 	sb->st_blksize = so->so_snd.sb_hiwat;
552 	sb->st_dev = NODEV;
553 	mtx_enter(&unp_ino_mtx);
554 	if (unp->unp_ino == 0)
555 		unp->unp_ino = unp_ino++;
556 	mtx_leave(&unp_ino_mtx);
557 	sb->st_atim.tv_sec =
558 	    sb->st_mtim.tv_sec =
559 	    sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
560 	sb->st_atim.tv_nsec =
561 	    sb->st_mtim.tv_nsec =
562 	    sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
563 	sb->st_ino = unp->unp_ino;
564 
565 	return (0);
566 }
567 
568 int
569 uipc_sockaddr(struct socket *so, struct mbuf *nam)
570 {
571 	struct unpcb *unp = sotounpcb(so);
572 
573 	uipc_setaddr(unp, nam);
574 	return (0);
575 }
576 
577 int
578 uipc_peeraddr(struct socket *so, struct mbuf *nam)
579 {
580 	struct unpcb *unp = sotounpcb(so);
581 	struct socket *so2;
582 
583 	so2 = unp_solock_peer(so);
584 	uipc_setaddr(unp->unp_conn, nam);
585 	if (so2 != NULL && so2 != so)
586 		sounlock(so2);
587 	return (0);
588 }
589 
590 int
591 uipc_connect2(struct socket *so, struct socket *so2)
592 {
593 	struct unpcb *unp = sotounpcb(so), *unp2;
594 	int error;
595 
596 	if ((error = unp_connect2(so, so2)))
597 		return (error);
598 
599 	unp->unp_connid.uid = curproc->p_ucred->cr_uid;
600 	unp->unp_connid.gid = curproc->p_ucred->cr_gid;
601 	unp->unp_connid.pid = curproc->p_p->ps_pid;
602 	unp->unp_flags |= UNP_FEIDS;
603 	unp2 = sotounpcb(so2);
604 	unp2->unp_connid.uid = curproc->p_ucred->cr_uid;
605 	unp2->unp_connid.gid = curproc->p_ucred->cr_gid;
606 	unp2->unp_connid.pid = curproc->p_p->ps_pid;
607 	unp2->unp_flags |= UNP_FEIDS;
608 
609 	return (0);
610 }
611 
612 int
613 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
614     size_t newlen)
615 {
616 	int *valp = &unp_defer;
617 
618 	/* All sysctl names at this level are terminal. */
619 	switch (name[0]) {
620 	case SOCK_STREAM:
621 		if (namelen != 2)
622 			return (ENOTDIR);
623 		return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars),
624 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
625 	case SOCK_SEQPACKET:
626 		if (namelen != 2)
627 			return (ENOTDIR);
628 		return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars),
629 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
630 	case SOCK_DGRAM:
631 		if (namelen != 2)
632 			return (ENOTDIR);
633 		return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars),
634 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
635 	case NET_UNIX_INFLIGHT:
636 		valp = &unp_rights;
637 		/* FALLTHOUGH */
638 	case NET_UNIX_DEFERRED:
639 		if (namelen != 1)
640 			return (ENOTDIR);
641 		return sysctl_rdint(oldp, oldlenp, newp, *valp);
642 	default:
643 		return (ENOPROTOOPT);
644 	}
645 }
646 
647 void
648 unp_detach(struct unpcb *unp)
649 {
650 	struct socket *so = unp->unp_socket;
651 	struct vnode *vp = unp->unp_vnode;
652 	struct unpcb *unp2;
653 
654 	unp->unp_vnode = NULL;
655 
656 	/*
657 	 * Enforce `unp_gc_lock' -> `solock()' lock order.
658 	 * Enforce `i_lock' -> `solock()' lock order.
659 	 */
660 	sounlock(so);
661 
662 	rw_enter_write(&unp_gc_lock);
663 	LIST_REMOVE(unp, unp_link);
664 	rw_exit_write(&unp_gc_lock);
665 
666 	if (vp != NULL) {
667 		VOP_LOCK(vp, LK_EXCLUSIVE);
668 		vp->v_socket = NULL;
669 
670 		KERNEL_LOCK();
671 		vput(vp);
672 		KERNEL_UNLOCK();
673 	}
674 
675 	solock(so);
676 
677 	if (unp->unp_conn != NULL) {
678 		/*
679 		 * Datagram socket could be connected to itself.
680 		 * Such socket will be disconnected here.
681 		 */
682 		unp_disconnect(unp);
683 	}
684 
685 	while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) {
686 		struct socket *so2 = unp2->unp_socket;
687 
688 		if (so < so2)
689 			solock(so2);
690 		else {
691 			unp_ref(unp2);
692 			sounlock(so);
693 			solock(so2);
694 			solock(so);
695 
696 			if (unp2->unp_conn != unp) {
697 				/* `unp2' was disconnected due to re-lock. */
698 				sounlock(so2);
699 				unp_rele(unp2);
700 				continue;
701 			}
702 
703 			unp_rele(unp2);
704 		}
705 
706 		unp2->unp_conn = NULL;
707 		SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref);
708 		so2->so_error = ECONNRESET;
709 		so2->so_state &= ~SS_ISCONNECTED;
710 
711 		sounlock(so2);
712 	}
713 
714 	sounlock(so);
715 	refcnt_finalize(&unp->unp_refcnt, "unpfinal");
716 	solock(so);
717 
718 	soisdisconnected(so);
719 	so->so_pcb = NULL;
720 	m_freem(unp->unp_addr);
721 	pool_put(&unpcb_pool, unp);
722 	if (unp_rights)
723 		task_add(systqmp, &unp_gc_task);
724 }
725 
726 int
727 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
728 {
729 	struct sockaddr_un *soun;
730 	struct mbuf *nam2;
731 	struct vnode *vp;
732 	struct vattr vattr;
733 	int error;
734 	struct nameidata nd;
735 	size_t pathlen;
736 
737 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
738 		return (EINVAL);
739 	if (unp->unp_vnode != NULL)
740 		return (EINVAL);
741 	if ((error = unp_nam2sun(nam, &soun, &pathlen)))
742 		return (error);
743 
744 	unp->unp_flags |= UNP_BINDING;
745 
746 	/*
747 	 * Enforce `i_lock' -> `unplock' because fifo subsystem
748 	 * requires it. The socket can't be closed concurrently
749 	 * because the file descriptor reference is still held.
750 	 */
751 
752 	sounlock(unp->unp_socket);
753 
754 	nam2 = m_getclr(M_WAITOK, MT_SONAME);
755 	nam2->m_len = sizeof(struct sockaddr_un);
756 	memcpy(mtod(nam2, struct sockaddr_un *), soun,
757 	    offsetof(struct sockaddr_un, sun_path) + pathlen);
758 	/* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
759 
760 	soun = mtod(nam2, struct sockaddr_un *);
761 
762 	/* Fixup sun_len to keep it in sync with m_len. */
763 	soun->sun_len = nam2->m_len;
764 
765 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
766 	    soun->sun_path, p);
767 	nd.ni_pledge = PLEDGE_UNIX;
768 	nd.ni_unveil = UNVEIL_CREATE;
769 
770 	KERNEL_LOCK();
771 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
772 	error = namei(&nd);
773 	if (error != 0) {
774 		m_freem(nam2);
775 		solock(unp->unp_socket);
776 		goto out;
777 	}
778 	vp = nd.ni_vp;
779 	if (vp != NULL) {
780 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
781 		if (nd.ni_dvp == vp)
782 			vrele(nd.ni_dvp);
783 		else
784 			vput(nd.ni_dvp);
785 		vrele(vp);
786 		m_freem(nam2);
787 		error = EADDRINUSE;
788 		solock(unp->unp_socket);
789 		goto out;
790 	}
791 	VATTR_NULL(&vattr);
792 	vattr.va_type = VSOCK;
793 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
794 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
795 	vput(nd.ni_dvp);
796 	if (error) {
797 		m_freem(nam2);
798 		solock(unp->unp_socket);
799 		goto out;
800 	}
801 	solock(unp->unp_socket);
802 	unp->unp_addr = nam2;
803 	vp = nd.ni_vp;
804 	vp->v_socket = unp->unp_socket;
805 	unp->unp_vnode = vp;
806 	unp->unp_connid.uid = p->p_ucred->cr_uid;
807 	unp->unp_connid.gid = p->p_ucred->cr_gid;
808 	unp->unp_connid.pid = p->p_p->ps_pid;
809 	unp->unp_flags |= UNP_FEIDSBIND;
810 	VOP_UNLOCK(vp);
811 out:
812 	KERNEL_UNLOCK();
813 	unp->unp_flags &= ~UNP_BINDING;
814 
815 	return (error);
816 }
817 
818 int
819 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
820 {
821 	struct sockaddr_un *soun;
822 	struct vnode *vp;
823 	struct socket *so2, *so3;
824 	struct unpcb *unp, *unp2, *unp3;
825 	struct nameidata nd;
826 	int error;
827 
828 	unp = sotounpcb(so);
829 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
830 		return (EISCONN);
831 	if ((error = unp_nam2sun(nam, &soun, NULL)))
832 		return (error);
833 
834 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
835 	nd.ni_pledge = PLEDGE_UNIX;
836 	nd.ni_unveil = UNVEIL_WRITE;
837 
838 	unp->unp_flags |= UNP_CONNECTING;
839 
840 	/*
841 	 * Enforce `i_lock' -> `unplock' because fifo subsystem
842 	 * requires it. The socket can't be closed concurrently
843 	 * because the file descriptor reference is still held.
844 	 */
845 
846 	sounlock(so);
847 
848 	KERNEL_LOCK();
849 	error = namei(&nd);
850 	if (error != 0)
851 		goto unlock;
852 	vp = nd.ni_vp;
853 	if (vp->v_type != VSOCK) {
854 		error = ENOTSOCK;
855 		goto put;
856 	}
857 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
858 		goto put;
859 	so2 = vp->v_socket;
860 	if (so2 == NULL) {
861 		error = ECONNREFUSED;
862 		goto put;
863 	}
864 	if (so->so_type != so2->so_type) {
865 		error = EPROTOTYPE;
866 		goto put;
867 	}
868 
869 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
870 		solock(so2);
871 
872 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
873 		    (so3 = sonewconn(so2, 0, M_WAIT)) == NULL) {
874 			error = ECONNREFUSED;
875 		}
876 
877 		sounlock(so2);
878 
879 		if (error != 0)
880 			goto put;
881 
882 		/*
883 		 * Since `so2' is protected by vnode(9) lock, `so3'
884 		 * can't be PRU_ABORT'ed here.
885 		 */
886 		solock_pair(so, so3);
887 
888 		unp2 = sotounpcb(so2);
889 		unp3 = sotounpcb(so3);
890 
891 		/*
892 		 * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag
893 		 * are immutable since we set them in unp_bind().
894 		 */
895 		if (unp2->unp_addr)
896 			unp3->unp_addr =
897 			    m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
898 		unp3->unp_connid.uid = p->p_ucred->cr_uid;
899 		unp3->unp_connid.gid = p->p_ucred->cr_gid;
900 		unp3->unp_connid.pid = p->p_p->ps_pid;
901 		unp3->unp_flags |= UNP_FEIDS;
902 
903 		if (unp2->unp_flags & UNP_FEIDSBIND) {
904 			unp->unp_connid = unp2->unp_connid;
905 			unp->unp_flags |= UNP_FEIDS;
906 		}
907 
908 		so2 = so3;
909 	} else {
910 		if (so2 != so)
911 			solock_pair(so, so2);
912 		else
913 			solock(so);
914 	}
915 
916 	error = unp_connect2(so, so2);
917 
918 	sounlock(so);
919 
920 	/*
921 	 * `so2' can't be PRU_ABORT'ed concurrently
922 	 */
923 	if (so2 != so)
924 		sounlock(so2);
925 put:
926 	vput(vp);
927 unlock:
928 	KERNEL_UNLOCK();
929 	solock(so);
930 	unp->unp_flags &= ~UNP_CONNECTING;
931 
932 	/*
933 	 * The peer socket could be closed by concurrent thread
934 	 * when `so' and `vp' are unlocked.
935 	 */
936 	if (error == 0 && unp->unp_conn == NULL)
937 		error = ECONNREFUSED;
938 
939 	return (error);
940 }
941 
942 int
943 unp_connect2(struct socket *so, struct socket *so2)
944 {
945 	struct unpcb *unp = sotounpcb(so);
946 	struct unpcb *unp2;
947 
948 	soassertlocked(so);
949 	soassertlocked(so2);
950 
951 	if (so2->so_type != so->so_type)
952 		return (EPROTOTYPE);
953 	unp2 = sotounpcb(so2);
954 	unp->unp_conn = unp2;
955 	switch (so->so_type) {
956 
957 	case SOCK_DGRAM:
958 		SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
959 		soisconnected(so);
960 		break;
961 
962 	case SOCK_STREAM:
963 	case SOCK_SEQPACKET:
964 		unp2->unp_conn = unp;
965 		soisconnected(so);
966 		soisconnected(so2);
967 		break;
968 
969 	default:
970 		panic("unp_connect2");
971 	}
972 	return (0);
973 }
974 
975 void
976 unp_disconnect(struct unpcb *unp)
977 {
978 	struct socket *so2;
979 	struct unpcb *unp2;
980 
981 	if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
982 		return;
983 
984 	unp2 = unp->unp_conn;
985 	unp->unp_conn = NULL;
986 
987 	switch (unp->unp_socket->so_type) {
988 
989 	case SOCK_DGRAM:
990 		SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
991 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
992 		break;
993 
994 	case SOCK_STREAM:
995 	case SOCK_SEQPACKET:
996 		unp->unp_socket->so_snd.sb_mbcnt = 0;
997 		unp->unp_socket->so_snd.sb_cc = 0;
998 		soisdisconnected(unp->unp_socket);
999 		unp2->unp_conn = NULL;
1000 		unp2->unp_socket->so_snd.sb_mbcnt = 0;
1001 		unp2->unp_socket->so_snd.sb_cc = 0;
1002 		soisdisconnected(unp2->unp_socket);
1003 		break;
1004 	}
1005 
1006 	if (so2 != unp->unp_socket)
1007 		sounlock(so2);
1008 }
1009 
1010 static struct unpcb *
1011 fptounp(struct file *fp)
1012 {
1013 	struct socket *so;
1014 
1015 	if (fp->f_type != DTYPE_SOCKET)
1016 		return (NULL);
1017 	if ((so = fp->f_data) == NULL)
1018 		return (NULL);
1019 	if (so->so_proto->pr_domain != &unixdomain)
1020 		return (NULL);
1021 	return (sotounpcb(so));
1022 }
1023 
1024 int
1025 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
1026 {
1027 	struct proc *p = curproc;		/* XXX */
1028 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1029 	struct filedesc *fdp = p->p_fd;
1030 	int i, *fds = NULL;
1031 	struct fdpass *rp;
1032 	struct file *fp;
1033 	int nfds, error = 0;
1034 
1035 	/*
1036 	 * This code only works because SCM_RIGHTS is the only supported
1037 	 * control message type on unix sockets. Enforce this here.
1038 	 */
1039 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
1040 		return EINVAL;
1041 
1042 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1043 	    sizeof(struct fdpass);
1044 	if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
1045 		controllen = 0;
1046 	else
1047 		controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
1048 	if (nfds > controllen / sizeof(int)) {
1049 		error = EMSGSIZE;
1050 		goto out;
1051 	}
1052 
1053 	/* Make sure the recipient should be able to see the descriptors.. */
1054 	rp = (struct fdpass *)CMSG_DATA(cm);
1055 
1056 	/* fdp->fd_rdir requires KERNEL_LOCK() */
1057 	KERNEL_LOCK();
1058 
1059 	for (i = 0; i < nfds; i++) {
1060 		fp = rp->fp;
1061 		rp++;
1062 		error = pledge_recvfd(p, fp);
1063 		if (error)
1064 			break;
1065 
1066 		/*
1067 		 * No to block devices.  If passing a directory,
1068 		 * make sure that it is underneath the root.
1069 		 */
1070 		if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
1071 			struct vnode *vp = (struct vnode *)fp->f_data;
1072 
1073 			if (vp->v_type == VBLK ||
1074 			    (vp->v_type == VDIR &&
1075 			    !vn_isunder(vp, fdp->fd_rdir, p))) {
1076 				error = EPERM;
1077 				break;
1078 			}
1079 		}
1080 	}
1081 
1082 	KERNEL_UNLOCK();
1083 
1084 	if (error)
1085 		goto out;
1086 
1087 	fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
1088 
1089 	fdplock(fdp);
1090 restart:
1091 	/*
1092 	 * First loop -- allocate file descriptor table slots for the
1093 	 * new descriptors.
1094 	 */
1095 	rp = ((struct fdpass *)CMSG_DATA(cm));
1096 	for (i = 0; i < nfds; i++) {
1097 		if ((error = fdalloc(p, 0, &fds[i])) != 0) {
1098 			/*
1099 			 * Back out what we've done so far.
1100 			 */
1101 			for (--i; i >= 0; i--)
1102 				fdremove(fdp, fds[i]);
1103 
1104 			if (error == ENOSPC) {
1105 				fdexpand(p);
1106 				goto restart;
1107 			}
1108 
1109 			fdpunlock(fdp);
1110 
1111 			/*
1112 			 * This is the error that has historically
1113 			 * been returned, and some callers may
1114 			 * expect it.
1115 			 */
1116 
1117 			error = EMSGSIZE;
1118 			goto out;
1119 		}
1120 
1121 		/*
1122 		 * Make the slot reference the descriptor so that
1123 		 * fdalloc() works properly.. We finalize it all
1124 		 * in the loop below.
1125 		 */
1126 		mtx_enter(&fdp->fd_fplock);
1127 		KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
1128 		fdp->fd_ofiles[fds[i]] = rp->fp;
1129 		mtx_leave(&fdp->fd_fplock);
1130 
1131 		fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
1132 		if (flags & MSG_CMSG_CLOEXEC)
1133 			fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
1134 
1135 		rp++;
1136 	}
1137 
1138 	/*
1139 	 * Keep `fdp' locked to prevent concurrent close() of just
1140 	 * inserted descriptors. Such descriptors could have the only
1141 	 * `f_count' reference which is now shared between control
1142 	 * message and `fdp'.
1143 	 */
1144 
1145 	/*
1146 	 * Now that adding them has succeeded, update all of the
1147 	 * descriptor passing state.
1148 	 */
1149 	rp = (struct fdpass *)CMSG_DATA(cm);
1150 
1151 	for (i = 0; i < nfds; i++) {
1152 		struct unpcb *unp;
1153 
1154 		fp = rp->fp;
1155 		rp++;
1156 		if ((unp = fptounp(fp)) != NULL) {
1157 			rw_enter_write(&unp_gc_lock);
1158 			unp->unp_msgcount--;
1159 			rw_exit_write(&unp_gc_lock);
1160 		}
1161 	}
1162 	fdpunlock(fdp);
1163 
1164 	mtx_enter(&unp_rights_mtx);
1165 	unp_rights -= nfds;
1166 	mtx_leave(&unp_rights_mtx);
1167 
1168 	/*
1169 	 * Copy temporary array to message and adjust length, in case of
1170 	 * transition from large struct file pointers to ints.
1171 	 */
1172 	memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
1173 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
1174 	rights->m_len = CMSG_LEN(nfds * sizeof(int));
1175  out:
1176 	if (fds != NULL)
1177 		free(fds, M_TEMP, nfds * sizeof(int));
1178 
1179 	if (error) {
1180 		if (nfds > 0) {
1181 			/*
1182 			 * No lock required. We are the only `cm' holder.
1183 			 */
1184 			rp = ((struct fdpass *)CMSG_DATA(cm));
1185 			unp_discard(rp, nfds);
1186 		}
1187 	}
1188 
1189 	return (error);
1190 }
1191 
1192 int
1193 unp_internalize(struct mbuf *control, struct proc *p)
1194 {
1195 	struct filedesc *fdp = p->p_fd;
1196 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1197 	struct fdpass *rp;
1198 	struct file *fp;
1199 	struct unpcb *unp;
1200 	int i, error;
1201 	int nfds, *ip, fd, neededspace;
1202 
1203 	/*
1204 	 * Check for two potential msg_controllen values because
1205 	 * IETF stuck their nose in a place it does not belong.
1206 	 */
1207 	if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
1208 		return (EINVAL);
1209 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1210 	    !(cm->cmsg_len == control->m_len ||
1211 	    control->m_len == CMSG_ALIGN(cm->cmsg_len)))
1212 		return (EINVAL);
1213 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
1214 
1215 	mtx_enter(&unp_rights_mtx);
1216 	if (unp_rights + nfds > maxfiles / 10) {
1217 		mtx_leave(&unp_rights_mtx);
1218 		return (EMFILE);
1219 	}
1220 	unp_rights += nfds;
1221 	mtx_leave(&unp_rights_mtx);
1222 
1223 	/* Make sure we have room for the struct file pointers */
1224 morespace:
1225 	neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
1226 	    control->m_len;
1227 	if (neededspace > m_trailingspace(control)) {
1228 		char *tmp;
1229 		/* if we already have a cluster, the message is just too big */
1230 		if (control->m_flags & M_EXT) {
1231 			error = E2BIG;
1232 			goto nospace;
1233 		}
1234 
1235 		/* copy cmsg data temporarily out of the mbuf */
1236 		tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
1237 		memcpy(tmp, mtod(control, caddr_t), control->m_len);
1238 
1239 		/* allocate a cluster and try again */
1240 		MCLGET(control, M_WAIT);
1241 		if ((control->m_flags & M_EXT) == 0) {
1242 			free(tmp, M_TEMP, control->m_len);
1243 			error = ENOBUFS;       /* allocation failed */
1244 			goto nospace;
1245 		}
1246 
1247 		/* copy the data back into the cluster */
1248 		cm = mtod(control, struct cmsghdr *);
1249 		memcpy(cm, tmp, control->m_len);
1250 		free(tmp, M_TEMP, control->m_len);
1251 		goto morespace;
1252 	}
1253 
1254 	/* adjust message & mbuf to note amount of space actually used. */
1255 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
1256 	control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
1257 
1258 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
1259 	rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
1260 	fdplock(fdp);
1261 	for (i = 0; i < nfds; i++) {
1262 		memcpy(&fd, ip, sizeof fd);
1263 		ip--;
1264 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
1265 			error = EBADF;
1266 			goto fail;
1267 		}
1268 		if (fp->f_count >= FDUP_MAX_COUNT) {
1269 			error = EDEADLK;
1270 			goto fail;
1271 		}
1272 		error = pledge_sendfd(p, fp);
1273 		if (error)
1274 			goto fail;
1275 
1276 		/* kqueue descriptors cannot be copied */
1277 		if (fp->f_type == DTYPE_KQUEUE) {
1278 			error = EINVAL;
1279 			goto fail;
1280 		}
1281 #if NKCOV > 0
1282 		/* kcov descriptors cannot be copied */
1283 		if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) {
1284 			error = EINVAL;
1285 			goto fail;
1286 		}
1287 #endif
1288 		rp->fp = fp;
1289 		rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
1290 		rp--;
1291 		if ((unp = fptounp(fp)) != NULL) {
1292 			rw_enter_write(&unp_gc_lock);
1293 			unp->unp_msgcount++;
1294 			unp->unp_file = fp;
1295 			rw_exit_write(&unp_gc_lock);
1296 		}
1297 	}
1298 	fdpunlock(fdp);
1299 	return (0);
1300 fail:
1301 	fdpunlock(fdp);
1302 	if (fp != NULL)
1303 		FRELE(fp, p);
1304 	/* Back out what we just did. */
1305 	for ( ; i > 0; i--) {
1306 		rp++;
1307 		fp = rp->fp;
1308 		if ((unp = fptounp(fp)) != NULL) {
1309 			rw_enter_write(&unp_gc_lock);
1310 			unp->unp_msgcount--;
1311 			rw_exit_write(&unp_gc_lock);
1312 		}
1313 		FRELE(fp, p);
1314 	}
1315 
1316 nospace:
1317 	mtx_enter(&unp_rights_mtx);
1318 	unp_rights -= nfds;
1319 	mtx_leave(&unp_rights_mtx);
1320 
1321 	return (error);
1322 }
1323 
1324 void
1325 unp_gc(void *arg __unused)
1326 {
1327 	struct unp_deferral *defer;
1328 	struct file *fp;
1329 	struct socket *so;
1330 	struct unpcb *unp;
1331 	int nunref, i;
1332 
1333 	rw_enter_write(&unp_gc_lock);
1334 	if (unp_gcing)
1335 		goto unlock;
1336 	unp_gcing = 1;
1337 	rw_exit_write(&unp_gc_lock);
1338 
1339 	rw_enter_write(&unp_df_lock);
1340 	/* close any fds on the deferred list */
1341 	while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
1342 		SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
1343 		rw_exit_write(&unp_df_lock);
1344 		for (i = 0; i < defer->ud_n; i++) {
1345 			fp = defer->ud_fp[i].fp;
1346 			if (fp == NULL)
1347 				continue;
1348 			if ((unp = fptounp(fp)) != NULL) {
1349 				rw_enter_write(&unp_gc_lock);
1350 				unp->unp_msgcount--;
1351 				rw_exit_write(&unp_gc_lock);
1352 			}
1353 			mtx_enter(&unp_rights_mtx);
1354 			unp_rights--;
1355 			mtx_leave(&unp_rights_mtx);
1356 			 /* closef() expects a refcount of 2 */
1357 			FREF(fp);
1358 			(void) closef(fp, NULL);
1359 		}
1360 		free(defer, M_TEMP, sizeof(*defer) +
1361 		    sizeof(struct fdpass) * defer->ud_n);
1362 		rw_enter_write(&unp_df_lock);
1363 	}
1364 	rw_exit_write(&unp_df_lock);
1365 
1366 	nunref = 0;
1367 
1368 	rw_enter_write(&unp_gc_lock);
1369 
1370 	/*
1371 	 * Determine sockets which may be prospectively dead. Such
1372 	 * sockets have their `unp_msgcount' equal to the `f_count'.
1373 	 * If `unp_msgcount' is 0, the socket has not been passed
1374 	 * and can't be unreferenced.
1375 	 */
1376 	LIST_FOREACH(unp, &unp_head, unp_link) {
1377 		unp->unp_gcflags = 0;
1378 
1379 		if (unp->unp_msgcount == 0)
1380 			continue;
1381 		if ((fp = unp->unp_file) == NULL)
1382 			continue;
1383 		if (fp->f_count == unp->unp_msgcount) {
1384 			unp->unp_gcflags |= UNP_GCDEAD;
1385 			unp->unp_gcrefs = unp->unp_msgcount;
1386 			nunref++;
1387 		}
1388 	}
1389 
1390 	/*
1391 	 * Scan all sockets previously marked as dead. Remove
1392 	 * the `unp_gcrefs' reference each socket holds on any
1393 	 * dead socket in its buffer.
1394 	 */
1395 	LIST_FOREACH(unp, &unp_head, unp_link) {
1396 		if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1397 			continue;
1398 		so = unp->unp_socket;
1399 		solock(so);
1400 		unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
1401 		sounlock(so);
1402 	}
1403 
1404 	/*
1405 	 * If the dead socket has `unp_gcrefs' reference counter
1406 	 * greater than 0, it can't be unreferenced. Mark it as
1407 	 * alive and increment the `unp_gcrefs' reference for each
1408 	 * dead socket within its buffer. Repeat this until we
1409 	 * have no new alive sockets found.
1410 	 */
1411 	do {
1412 		unp_defer = 0;
1413 
1414 		LIST_FOREACH(unp, &unp_head, unp_link) {
1415 			if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1416 				continue;
1417 			if (unp->unp_gcrefs == 0)
1418 				continue;
1419 
1420 			unp->unp_gcflags &= ~UNP_GCDEAD;
1421 
1422 			so = unp->unp_socket;
1423 			solock(so);
1424 			unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
1425 			sounlock(so);
1426 
1427 			KASSERT(nunref > 0);
1428 			nunref--;
1429 		}
1430 	} while (unp_defer > 0);
1431 
1432 	/*
1433 	 * If there are any unreferenced sockets, then for each dispose
1434 	 * of files in its receive buffer and then close it.
1435 	 */
1436 	if (nunref) {
1437 		LIST_FOREACH(unp, &unp_head, unp_link) {
1438 			if (unp->unp_gcflags & UNP_GCDEAD) {
1439 				/*
1440 				 * This socket could still be connected
1441 				 * and if so it's `so_rcv' is still
1442 				 * accessible by concurrent PRU_SEND
1443 				 * thread.
1444 				 */
1445 				so = unp->unp_socket;
1446 				solock(so);
1447 				unp_scan(so->so_rcv.sb_mb, unp_discard);
1448 				sounlock(so);
1449 			}
1450 		}
1451 	}
1452 
1453 	unp_gcing = 0;
1454 unlock:
1455 	rw_exit_write(&unp_gc_lock);
1456 }
1457 
1458 void
1459 unp_dispose(struct mbuf *m)
1460 {
1461 
1462 	if (m)
1463 		unp_scan(m, unp_discard);
1464 }
1465 
1466 void
1467 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1468 {
1469 	struct mbuf *m;
1470 	struct fdpass *rp;
1471 	struct cmsghdr *cm;
1472 	int qfds;
1473 
1474 	while (m0) {
1475 		for (m = m0; m; m = m->m_next) {
1476 			if (m->m_type == MT_CONTROL &&
1477 			    m->m_len >= sizeof(*cm)) {
1478 				cm = mtod(m, struct cmsghdr *);
1479 				if (cm->cmsg_level != SOL_SOCKET ||
1480 				    cm->cmsg_type != SCM_RIGHTS)
1481 					continue;
1482 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1483 				    / sizeof(struct fdpass);
1484 				if (qfds > 0) {
1485 					rp = (struct fdpass *)CMSG_DATA(cm);
1486 					op(rp, qfds);
1487 				}
1488 				break;		/* XXX, but saves time */
1489 			}
1490 		}
1491 		m0 = m0->m_nextpkt;
1492 	}
1493 }
1494 
1495 void
1496 unp_discard(struct fdpass *rp, int nfds)
1497 {
1498 	struct unp_deferral *defer;
1499 
1500 	/* copy the file pointers to a deferral structure */
1501 	defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1502 	defer->ud_n = nfds;
1503 	memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1504 	memset(rp, 0, sizeof(*rp) * nfds);
1505 
1506 	rw_enter_write(&unp_df_lock);
1507 	SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1508 	rw_exit_write(&unp_df_lock);
1509 
1510 	task_add(systqmp, &unp_gc_task);
1511 }
1512 
1513 void
1514 unp_remove_gcrefs(struct fdpass *rp, int nfds)
1515 {
1516 	struct unpcb *unp;
1517 	int i;
1518 
1519 	rw_assert_wrlock(&unp_gc_lock);
1520 
1521 	for (i = 0; i < nfds; i++) {
1522 		if (rp[i].fp == NULL)
1523 			continue;
1524 		if ((unp = fptounp(rp[i].fp)) == NULL)
1525 			continue;
1526 		if (unp->unp_gcflags & UNP_GCDEAD) {
1527 			KASSERT(unp->unp_gcrefs > 0);
1528 			unp->unp_gcrefs--;
1529 		}
1530 	}
1531 }
1532 
1533 void
1534 unp_restore_gcrefs(struct fdpass *rp, int nfds)
1535 {
1536 	struct unpcb *unp;
1537 	int i;
1538 
1539 	rw_assert_wrlock(&unp_gc_lock);
1540 
1541 	for (i = 0; i < nfds; i++) {
1542 		if (rp[i].fp == NULL)
1543 			continue;
1544 		if ((unp = fptounp(rp[i].fp)) == NULL)
1545 			continue;
1546 		if (unp->unp_gcflags & UNP_GCDEAD) {
1547 			unp->unp_gcrefs++;
1548 			unp_defer++;
1549 		}
1550 	}
1551 }
1552 
1553 int
1554 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1555 {
1556 	struct sockaddr *sa = mtod(nam, struct sockaddr *);
1557 	size_t size, len;
1558 
1559 	if (nam->m_len < offsetof(struct sockaddr, sa_data))
1560 		return EINVAL;
1561 	if (sa->sa_family != AF_UNIX)
1562 		return EAFNOSUPPORT;
1563 	if (sa->sa_len != nam->m_len)
1564 		return EINVAL;
1565 	if (sa->sa_len > sizeof(struct sockaddr_un))
1566 		return EINVAL;
1567 	*sun = (struct sockaddr_un *)sa;
1568 
1569 	/* ensure that sun_path is NUL terminated and fits */
1570 	size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1571 	len = strnlen((*sun)->sun_path, size);
1572 	if (len == sizeof((*sun)->sun_path))
1573 		return EINVAL;
1574 	if (len == size) {
1575 		if (m_trailingspace(nam) == 0)
1576 			return EINVAL;
1577 		nam->m_len++;
1578 		(*sun)->sun_len++;
1579 		(*sun)->sun_path[len] = '\0';
1580 	}
1581 	if (pathlen != NULL)
1582 		*pathlen = len;
1583 
1584 	return 0;
1585 }
1586