xref: /openbsd-src/sys/kern/uipc_usrreq.c (revision 280d7fb53c6711de2bb83b7a67ed4ac8b8b3b32c)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.191 2022/10/17 14:49:01 mvs Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 #include <sys/rwlock.h>
55 #include <sys/mutex.h>
56 #include <sys/sysctl.h>
57 #include <sys/lock.h>
58 #include <sys/refcnt.h>
59 
60 #include "kcov.h"
61 #if NKCOV > 0
62 #include <sys/kcov.h>
63 #endif
64 
65 /*
66  * Locks used to protect global data and struct members:
67  *      I       immutable after creation
68  *      D       unp_df_lock
69  *      G       unp_gc_lock
70  *      M       unp_ino_mtx
71  *      R       unp_rights_mtx
72  *      a       atomic
73  *      s       socket lock
74  */
75 
76 struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock");
77 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
78 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
79 
80 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
81 struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
82 
83 /*
84  * Stack of sets of files that were passed over a socket but were
85  * not received and need to be closed.
86  */
87 struct	unp_deferral {
88 	SLIST_ENTRY(unp_deferral)	ud_link;	/* [D] */
89 	int				ud_n;		/* [I] */
90 	/* followed by ud_n struct fdpass */
91 	struct fdpass			ud_fp[];	/* [I] */
92 };
93 
94 void	uipc_setaddr(const struct unpcb *, struct mbuf *);
95 void	unp_discard(struct fdpass *, int);
96 void	unp_remove_gcrefs(struct fdpass *, int);
97 void	unp_restore_gcrefs(struct fdpass *, int);
98 void	unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
99 int	unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
100 static inline void unp_ref(struct unpcb *);
101 static inline void unp_rele(struct unpcb *);
102 struct socket *unp_solock_peer(struct socket *);
103 
104 struct pool unpcb_pool;
105 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
106 
107 /*
108  * Unix communications domain.
109  *
110  * TODO:
111  *	RDM
112  *	rethink name space problems
113  *	need a proper out-of-band
114  */
115 const struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
116 
117 /* [G] list of all UNIX domain sockets, for unp_gc() */
118 LIST_HEAD(unp_head, unpcb)	unp_head =
119 	LIST_HEAD_INITIALIZER(unp_head);
120 /* [D] list of sets of files that were sent over sockets that are now closed */
121 SLIST_HEAD(,unp_deferral)	unp_deferred =
122 	SLIST_HEAD_INITIALIZER(unp_deferred);
123 
124 ino_t	unp_ino;	/* [U] prototype for fake inode numbers */
125 int	unp_rights;	/* [R] file descriptors in flight */
126 int	unp_defer;	/* [G] number of deferred fp to close by the GC task */
127 int	unp_gcing;	/* [G] GC task currently running */
128 
129 const struct pr_usrreqs uipc_usrreqs = {
130 	.pru_attach	= uipc_attach,
131 	.pru_detach	= uipc_detach,
132 	.pru_bind	= uipc_bind,
133 	.pru_listen	= uipc_listen,
134 	.pru_connect	= uipc_connect,
135 	.pru_accept	= uipc_accept,
136 	.pru_disconnect	= uipc_disconnect,
137 	.pru_shutdown	= uipc_shutdown,
138 	.pru_rcvd	= uipc_rcvd,
139 	.pru_send	= uipc_send,
140 	.pru_abort	= uipc_abort,
141 	.pru_sense	= uipc_sense,
142 	.pru_sockaddr	= uipc_sockaddr,
143 	.pru_peeraddr	= uipc_peeraddr,
144 	.pru_connect2	= uipc_connect2,
145 };
146 
147 void
148 unp_init(void)
149 {
150 	pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
151 	    IPL_SOFTNET, 0, "unpcb", NULL);
152 }
153 
154 static inline void
155 unp_ref(struct unpcb *unp)
156 {
157 	refcnt_take(&unp->unp_refcnt);
158 }
159 
160 static inline void
161 unp_rele(struct unpcb *unp)
162 {
163 	refcnt_rele_wake(&unp->unp_refcnt);
164 }
165 
166 struct socket *
167 unp_solock_peer(struct socket *so)
168 {
169 	struct unpcb *unp, *unp2;
170 	struct socket *so2;
171 
172 	unp = so->so_pcb;
173 
174 again:
175 	if ((unp2 = unp->unp_conn) == NULL)
176 		return NULL;
177 
178 	so2 = unp2->unp_socket;
179 
180 	if (so < so2)
181 		solock(so2);
182 	else if (so > so2){
183 		unp_ref(unp2);
184 		sounlock(so);
185 		solock(so2);
186 		solock(so);
187 
188 		/* Datagram socket could be reconnected due to re-lock. */
189 		if (unp->unp_conn != unp2) {
190 			sounlock(so2);
191 			unp_rele(unp2);
192 			goto again;
193 		}
194 
195 		unp_rele(unp2);
196 	}
197 
198 	return so2;
199 }
200 
201 void
202 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
203 {
204 	if (unp != NULL && unp->unp_addr != NULL) {
205 		nam->m_len = unp->unp_addr->m_len;
206 		memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
207 		    nam->m_len);
208 	} else {
209 		nam->m_len = sizeof(sun_noname);
210 		memcpy(mtod(nam, struct sockaddr *), &sun_noname,
211 		    nam->m_len);
212 	}
213 }
214 
215 /*
216  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
217  * for stream sockets, although the total for sender and receiver is
218  * actually only PIPSIZ.
219  * Datagram sockets really use the sendspace as the maximum datagram size,
220  * and don't really want to reserve the sendspace.  Their recvspace should
221  * be large enough for at least one max-size datagram plus address.
222  */
223 #define	PIPSIZ	8192
224 u_int	unpst_sendspace = PIPSIZ;
225 u_int	unpst_recvspace = PIPSIZ;
226 u_int	unpsq_sendspace = PIPSIZ;
227 u_int	unpsq_recvspace = PIPSIZ;
228 u_int	unpdg_sendspace = 2*1024;	/* really max datagram size */
229 u_int	unpdg_recvspace = 16*1024;
230 
231 const struct sysctl_bounded_args unpstctl_vars[] = {
232 	{ UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX },
233 	{ UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX },
234 };
235 const struct sysctl_bounded_args unpsqctl_vars[] = {
236 	{ UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX },
237 	{ UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX },
238 };
239 const struct sysctl_bounded_args unpdgctl_vars[] = {
240 	{ UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX },
241 	{ UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX },
242 };
243 
244 int
245 uipc_attach(struct socket *so, int proto, int wait)
246 {
247 	struct unpcb *unp;
248 	int error;
249 
250 	if (so->so_pcb)
251 		return EISCONN;
252 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
253 		switch (so->so_type) {
254 
255 		case SOCK_STREAM:
256 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
257 			break;
258 
259 		case SOCK_SEQPACKET:
260 			error = soreserve(so, unpsq_sendspace, unpsq_recvspace);
261 			break;
262 
263 		case SOCK_DGRAM:
264 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
265 			break;
266 
267 		default:
268 			panic("unp_attach");
269 		}
270 		if (error)
271 			return (error);
272 	}
273 	unp = pool_get(&unpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
274 	    PR_ZERO);
275 	if (unp == NULL)
276 		return (ENOBUFS);
277 	refcnt_init(&unp->unp_refcnt);
278 	unp->unp_socket = so;
279 	so->so_pcb = unp;
280 	getnanotime(&unp->unp_ctime);
281 
282 	/*
283 	 * Enforce `unp_gc_lock' -> `solock()' lock order.
284 	 */
285 	sounlock(so);
286 	rw_enter_write(&unp_gc_lock);
287 	LIST_INSERT_HEAD(&unp_head, unp, unp_link);
288 	rw_exit_write(&unp_gc_lock);
289 	solock(so);
290 	return (0);
291 }
292 
293 int
294 uipc_detach(struct socket *so)
295 {
296 	struct unpcb *unp = sotounpcb(so);
297 
298 	if (unp == NULL)
299 		return (EINVAL);
300 
301 	unp_detach(unp);
302 
303 	return (0);
304 }
305 
306 int
307 uipc_bind(struct socket *so, struct mbuf *nam, struct proc *p)
308 {
309 	struct unpcb *unp = sotounpcb(so);
310 
311 	return unp_bind(unp, nam, p);
312 }
313 
314 int
315 uipc_listen(struct socket *so)
316 {
317 	struct unpcb *unp = sotounpcb(so);
318 
319 	if (unp->unp_vnode == NULL)
320 		return (EINVAL);
321 	return (0);
322 }
323 
324 int
325 uipc_connect(struct socket *so, struct mbuf *nam)
326 {
327 	return unp_connect(so, nam, curproc);
328 }
329 
330 int
331 uipc_accept(struct socket *so, struct mbuf *nam)
332 {
333 	struct socket *so2;
334 	struct unpcb *unp = sotounpcb(so);
335 
336 	/*
337 	 * Pass back name of connected socket, if it was bound and
338 	 * we are still connected (our peer may have closed already!).
339 	 */
340 	so2 = unp_solock_peer(so);
341 	uipc_setaddr(unp->unp_conn, nam);
342 
343 	if (so2 != NULL && so2 != so)
344 		sounlock(so2);
345 	return (0);
346 }
347 
348 int
349 uipc_disconnect(struct socket *so)
350 {
351 	struct unpcb *unp = sotounpcb(so);
352 
353 	unp_disconnect(unp);
354 	return (0);
355 }
356 
357 int
358 uipc_shutdown(struct socket *so)
359 {
360 	struct unpcb *unp = sotounpcb(so);
361 
362 	socantsendmore(so);
363 	unp_shutdown(unp);
364 	return (0);
365 }
366 
367 void
368 uipc_rcvd(struct socket *so)
369 {
370 	struct socket *so2;
371 
372 	switch (so->so_type) {
373 	case SOCK_DGRAM:
374 		panic("uipc 1");
375 		/*NOTREACHED*/
376 
377 	case SOCK_STREAM:
378 	case SOCK_SEQPACKET:
379 		if ((so2 = unp_solock_peer(so)) == NULL)
380 			break;
381 		/*
382 		 * Adjust backpressure on sender
383 		 * and wakeup any waiting to write.
384 		 */
385 		so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
386 		so2->so_snd.sb_cc = so->so_rcv.sb_cc;
387 		sowwakeup(so2);
388 		sounlock(so2);
389 		break;
390 
391 	default:
392 		panic("uipc 2");
393 	}
394 }
395 
396 int
397 uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
398     struct mbuf *control)
399 {
400 	struct unpcb *unp = sotounpcb(so);
401 	struct socket *so2;
402 	int error = 0;
403 
404 	if (control) {
405 		sounlock(so);
406 		error = unp_internalize(control, curproc);
407 		solock(so);
408 		if (error)
409 			goto out;
410 	}
411 
412 	switch (so->so_type) {
413 	case SOCK_DGRAM: {
414 		const struct sockaddr *from;
415 
416 		if (nam) {
417 			if (unp->unp_conn) {
418 				error = EISCONN;
419 				break;
420 			}
421 			error = unp_connect(so, nam, curproc);
422 			if (error)
423 				break;
424 		}
425 
426 		if ((so2 = unp_solock_peer(so)) == NULL) {
427 			if (nam != NULL)
428 				error = ECONNREFUSED;
429 			else
430 				error = ENOTCONN;
431 			break;
432 		}
433 
434 		if (unp->unp_addr)
435 			from = mtod(unp->unp_addr, struct sockaddr *);
436 		else
437 			from = &sun_noname;
438 		if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
439 			sorwakeup(so2);
440 			m = NULL;
441 			control = NULL;
442 		} else
443 			error = ENOBUFS;
444 
445 		if (so2 != so)
446 			sounlock(so2);
447 
448 		if (nam)
449 			unp_disconnect(unp);
450 		break;
451 	}
452 
453 	case SOCK_STREAM:
454 	case SOCK_SEQPACKET:
455 		if (so->so_state & SS_CANTSENDMORE) {
456 			error = EPIPE;
457 			break;
458 		}
459 		if ((so2 = unp_solock_peer(so)) == NULL) {
460 			error = ENOTCONN;
461 			break;
462 		}
463 
464 		/*
465 		 * Send to paired receive port, and then raise
466 		 * send buffer counts to maintain backpressure.
467 		 * Wake up readers.
468 		 */
469 		if (control) {
470 			if (sbappendcontrol(so2, &so2->so_rcv, m, control)) {
471 				control = NULL;
472 			} else {
473 				sounlock(so2);
474 				error = ENOBUFS;
475 				break;
476 			}
477 		} else if (so->so_type == SOCK_SEQPACKET)
478 			sbappendrecord(so2, &so2->so_rcv, m);
479 		else
480 			sbappend(so2, &so2->so_rcv, m);
481 		so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
482 		so->so_snd.sb_cc = so2->so_rcv.sb_cc;
483 		if (so2->so_rcv.sb_cc > 0)
484 			sorwakeup(so2);
485 
486 		sounlock(so2);
487 		m = NULL;
488 		break;
489 
490 	default:
491 		panic("uipc 4");
492 	}
493 
494 	/* we need to undo unp_internalize in case of errors */
495 	if (control && error)
496 		unp_dispose(control);
497 
498 out:
499 	m_freem(control);
500 	m_freem(m);
501 
502 	return (error);
503 }
504 
505 void
506 uipc_abort(struct socket *so)
507 {
508 	struct unpcb *unp = sotounpcb(so);
509 
510 	unp_detach(unp);
511 	sofree(so, 0);
512 }
513 
514 int
515 uipc_sense(struct socket *so, struct stat *sb)
516 {
517 	struct unpcb *unp = sotounpcb(so);
518 
519 	sb->st_blksize = so->so_snd.sb_hiwat;
520 	sb->st_dev = NODEV;
521 	mtx_enter(&unp_ino_mtx);
522 	if (unp->unp_ino == 0)
523 		unp->unp_ino = unp_ino++;
524 	mtx_leave(&unp_ino_mtx);
525 	sb->st_atim.tv_sec =
526 	    sb->st_mtim.tv_sec =
527 	    sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
528 	sb->st_atim.tv_nsec =
529 	    sb->st_mtim.tv_nsec =
530 	    sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
531 	sb->st_ino = unp->unp_ino;
532 
533 	return (0);
534 }
535 
536 int
537 uipc_sockaddr(struct socket *so, struct mbuf *nam)
538 {
539 	struct unpcb *unp = sotounpcb(so);
540 
541 	uipc_setaddr(unp, nam);
542 	return (0);
543 }
544 
545 int
546 uipc_peeraddr(struct socket *so, struct mbuf *nam)
547 {
548 	struct unpcb *unp = sotounpcb(so);
549 	struct socket *so2;
550 
551 	so2 = unp_solock_peer(so);
552 	uipc_setaddr(unp->unp_conn, nam);
553 	if (so2 != NULL && so2 != so)
554 		sounlock(so2);
555 	return (0);
556 }
557 
558 int
559 uipc_connect2(struct socket *so, struct socket *so2)
560 {
561 	struct unpcb *unp = sotounpcb(so), *unp2;
562 	int error;
563 
564 	if ((error = unp_connect2(so, so2)))
565 		return (error);
566 
567 	unp->unp_connid.uid = curproc->p_ucred->cr_uid;
568 	unp->unp_connid.gid = curproc->p_ucred->cr_gid;
569 	unp->unp_connid.pid = curproc->p_p->ps_pid;
570 	unp->unp_flags |= UNP_FEIDS;
571 	unp2 = sotounpcb(so2);
572 	unp2->unp_connid.uid = curproc->p_ucred->cr_uid;
573 	unp2->unp_connid.gid = curproc->p_ucred->cr_gid;
574 	unp2->unp_connid.pid = curproc->p_p->ps_pid;
575 	unp2->unp_flags |= UNP_FEIDS;
576 
577 	return (0);
578 }
579 
580 int
581 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
582     size_t newlen)
583 {
584 	int *valp = &unp_defer;
585 
586 	/* All sysctl names at this level are terminal. */
587 	switch (name[0]) {
588 	case SOCK_STREAM:
589 		if (namelen != 2)
590 			return (ENOTDIR);
591 		return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars),
592 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
593 	case SOCK_SEQPACKET:
594 		if (namelen != 2)
595 			return (ENOTDIR);
596 		return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars),
597 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
598 	case SOCK_DGRAM:
599 		if (namelen != 2)
600 			return (ENOTDIR);
601 		return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars),
602 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
603 	case NET_UNIX_INFLIGHT:
604 		valp = &unp_rights;
605 		/* FALLTHOUGH */
606 	case NET_UNIX_DEFERRED:
607 		if (namelen != 1)
608 			return (ENOTDIR);
609 		return sysctl_rdint(oldp, oldlenp, newp, *valp);
610 	default:
611 		return (ENOPROTOOPT);
612 	}
613 }
614 
615 void
616 unp_detach(struct unpcb *unp)
617 {
618 	struct socket *so = unp->unp_socket;
619 	struct vnode *vp = unp->unp_vnode;
620 	struct unpcb *unp2;
621 
622 	unp->unp_vnode = NULL;
623 
624 	/*
625 	 * Enforce `unp_gc_lock' -> `solock()' lock order.
626 	 * Enforce `i_lock' -> `solock()' lock order.
627 	 */
628 	sounlock(so);
629 
630 	rw_enter_write(&unp_gc_lock);
631 	LIST_REMOVE(unp, unp_link);
632 	rw_exit_write(&unp_gc_lock);
633 
634 	if (vp != NULL) {
635 		VOP_LOCK(vp, LK_EXCLUSIVE);
636 		vp->v_socket = NULL;
637 
638 		KERNEL_LOCK();
639 		vput(vp);
640 		KERNEL_UNLOCK();
641 	}
642 
643 	solock(so);
644 
645 	if (unp->unp_conn != NULL) {
646 		/*
647 		 * Datagram socket could be connected to itself.
648 		 * Such socket will be disconnected here.
649 		 */
650 		unp_disconnect(unp);
651 	}
652 
653 	while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) {
654 		struct socket *so2 = unp2->unp_socket;
655 
656 		if (so < so2)
657 			solock(so2);
658 		else {
659 			unp_ref(unp2);
660 			sounlock(so);
661 			solock(so2);
662 			solock(so);
663 
664 			if (unp2->unp_conn != unp) {
665 				/* `unp2' was disconnected due to re-lock. */
666 				sounlock(so2);
667 				unp_rele(unp2);
668 				continue;
669 			}
670 
671 			unp_rele(unp2);
672 		}
673 
674 		unp2->unp_conn = NULL;
675 		SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref);
676 		so2->so_error = ECONNRESET;
677 		so2->so_state &= ~SS_ISCONNECTED;
678 
679 		sounlock(so2);
680 	}
681 
682 	sounlock(so);
683 	refcnt_finalize(&unp->unp_refcnt, "unpfinal");
684 	solock(so);
685 
686 	soisdisconnected(so);
687 	so->so_pcb = NULL;
688 	m_freem(unp->unp_addr);
689 	pool_put(&unpcb_pool, unp);
690 	if (unp_rights)
691 		task_add(systqmp, &unp_gc_task);
692 }
693 
694 int
695 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
696 {
697 	struct sockaddr_un *soun;
698 	struct mbuf *nam2;
699 	struct vnode *vp;
700 	struct vattr vattr;
701 	int error;
702 	struct nameidata nd;
703 	size_t pathlen;
704 
705 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
706 		return (EINVAL);
707 	if (unp->unp_vnode != NULL)
708 		return (EINVAL);
709 	if ((error = unp_nam2sun(nam, &soun, &pathlen)))
710 		return (error);
711 
712 	unp->unp_flags |= UNP_BINDING;
713 
714 	/*
715 	 * Enforce `i_lock' -> `unplock' because fifo subsystem
716 	 * requires it. The socket can't be closed concurrently
717 	 * because the file descriptor reference is still held.
718 	 */
719 
720 	sounlock(unp->unp_socket);
721 
722 	nam2 = m_getclr(M_WAITOK, MT_SONAME);
723 	nam2->m_len = sizeof(struct sockaddr_un);
724 	memcpy(mtod(nam2, struct sockaddr_un *), soun,
725 	    offsetof(struct sockaddr_un, sun_path) + pathlen);
726 	/* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
727 
728 	soun = mtod(nam2, struct sockaddr_un *);
729 
730 	/* Fixup sun_len to keep it in sync with m_len. */
731 	soun->sun_len = nam2->m_len;
732 
733 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
734 	    soun->sun_path, p);
735 	nd.ni_pledge = PLEDGE_UNIX;
736 	nd.ni_unveil = UNVEIL_CREATE;
737 
738 	KERNEL_LOCK();
739 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
740 	error = namei(&nd);
741 	if (error != 0) {
742 		m_freem(nam2);
743 		solock(unp->unp_socket);
744 		goto out;
745 	}
746 	vp = nd.ni_vp;
747 	if (vp != NULL) {
748 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
749 		if (nd.ni_dvp == vp)
750 			vrele(nd.ni_dvp);
751 		else
752 			vput(nd.ni_dvp);
753 		vrele(vp);
754 		m_freem(nam2);
755 		error = EADDRINUSE;
756 		solock(unp->unp_socket);
757 		goto out;
758 	}
759 	VATTR_NULL(&vattr);
760 	vattr.va_type = VSOCK;
761 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
762 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
763 	vput(nd.ni_dvp);
764 	if (error) {
765 		m_freem(nam2);
766 		solock(unp->unp_socket);
767 		goto out;
768 	}
769 	solock(unp->unp_socket);
770 	unp->unp_addr = nam2;
771 	vp = nd.ni_vp;
772 	vp->v_socket = unp->unp_socket;
773 	unp->unp_vnode = vp;
774 	unp->unp_connid.uid = p->p_ucred->cr_uid;
775 	unp->unp_connid.gid = p->p_ucred->cr_gid;
776 	unp->unp_connid.pid = p->p_p->ps_pid;
777 	unp->unp_flags |= UNP_FEIDSBIND;
778 	VOP_UNLOCK(vp);
779 out:
780 	KERNEL_UNLOCK();
781 	unp->unp_flags &= ~UNP_BINDING;
782 
783 	return (error);
784 }
785 
786 int
787 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
788 {
789 	struct sockaddr_un *soun;
790 	struct vnode *vp;
791 	struct socket *so2, *so3;
792 	struct unpcb *unp, *unp2, *unp3;
793 	struct nameidata nd;
794 	int error;
795 
796 	unp = sotounpcb(so);
797 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
798 		return (EISCONN);
799 	if ((error = unp_nam2sun(nam, &soun, NULL)))
800 		return (error);
801 
802 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
803 	nd.ni_pledge = PLEDGE_UNIX;
804 	nd.ni_unveil = UNVEIL_WRITE;
805 
806 	unp->unp_flags |= UNP_CONNECTING;
807 
808 	/*
809 	 * Enforce `i_lock' -> `unplock' because fifo subsystem
810 	 * requires it. The socket can't be closed concurrently
811 	 * because the file descriptor reference is still held.
812 	 */
813 
814 	sounlock(so);
815 
816 	KERNEL_LOCK();
817 	error = namei(&nd);
818 	if (error != 0)
819 		goto unlock;
820 	vp = nd.ni_vp;
821 	if (vp->v_type != VSOCK) {
822 		error = ENOTSOCK;
823 		goto put;
824 	}
825 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
826 		goto put;
827 	so2 = vp->v_socket;
828 	if (so2 == NULL) {
829 		error = ECONNREFUSED;
830 		goto put;
831 	}
832 	if (so->so_type != so2->so_type) {
833 		error = EPROTOTYPE;
834 		goto put;
835 	}
836 
837 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
838 		solock(so2);
839 
840 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
841 		    (so3 = sonewconn(so2, 0, M_WAIT)) == NULL) {
842 			error = ECONNREFUSED;
843 		}
844 
845 		sounlock(so2);
846 
847 		if (error != 0)
848 			goto put;
849 
850 		/*
851 		 * Since `so2' is protected by vnode(9) lock, `so3'
852 		 * can't be PRU_ABORT'ed here.
853 		 */
854 		solock_pair(so, so3);
855 
856 		unp2 = sotounpcb(so2);
857 		unp3 = sotounpcb(so3);
858 
859 		/*
860 		 * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag
861 		 * are immutable since we set them in unp_bind().
862 		 */
863 		if (unp2->unp_addr)
864 			unp3->unp_addr =
865 			    m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
866 		unp3->unp_connid.uid = p->p_ucred->cr_uid;
867 		unp3->unp_connid.gid = p->p_ucred->cr_gid;
868 		unp3->unp_connid.pid = p->p_p->ps_pid;
869 		unp3->unp_flags |= UNP_FEIDS;
870 
871 		if (unp2->unp_flags & UNP_FEIDSBIND) {
872 			unp->unp_connid = unp2->unp_connid;
873 			unp->unp_flags |= UNP_FEIDS;
874 		}
875 
876 		so2 = so3;
877 	} else {
878 		if (so2 != so)
879 			solock_pair(so, so2);
880 		else
881 			solock(so);
882 	}
883 
884 	error = unp_connect2(so, so2);
885 
886 	sounlock(so);
887 
888 	/*
889 	 * `so2' can't be PRU_ABORT'ed concurrently
890 	 */
891 	if (so2 != so)
892 		sounlock(so2);
893 put:
894 	vput(vp);
895 unlock:
896 	KERNEL_UNLOCK();
897 	solock(so);
898 	unp->unp_flags &= ~UNP_CONNECTING;
899 
900 	/*
901 	 * The peer socket could be closed by concurrent thread
902 	 * when `so' and `vp' are unlocked.
903 	 */
904 	if (error == 0 && unp->unp_conn == NULL)
905 		error = ECONNREFUSED;
906 
907 	return (error);
908 }
909 
910 int
911 unp_connect2(struct socket *so, struct socket *so2)
912 {
913 	struct unpcb *unp = sotounpcb(so);
914 	struct unpcb *unp2;
915 
916 	soassertlocked(so);
917 	soassertlocked(so2);
918 
919 	if (so2->so_type != so->so_type)
920 		return (EPROTOTYPE);
921 	unp2 = sotounpcb(so2);
922 	unp->unp_conn = unp2;
923 	switch (so->so_type) {
924 
925 	case SOCK_DGRAM:
926 		SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
927 		soisconnected(so);
928 		break;
929 
930 	case SOCK_STREAM:
931 	case SOCK_SEQPACKET:
932 		unp2->unp_conn = unp;
933 		soisconnected(so);
934 		soisconnected(so2);
935 		break;
936 
937 	default:
938 		panic("unp_connect2");
939 	}
940 	return (0);
941 }
942 
943 void
944 unp_disconnect(struct unpcb *unp)
945 {
946 	struct socket *so2;
947 	struct unpcb *unp2;
948 
949 	if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
950 		return;
951 
952 	unp2 = unp->unp_conn;
953 	unp->unp_conn = NULL;
954 
955 	switch (unp->unp_socket->so_type) {
956 
957 	case SOCK_DGRAM:
958 		SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
959 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
960 		break;
961 
962 	case SOCK_STREAM:
963 	case SOCK_SEQPACKET:
964 		unp->unp_socket->so_snd.sb_mbcnt = 0;
965 		unp->unp_socket->so_snd.sb_cc = 0;
966 		soisdisconnected(unp->unp_socket);
967 		unp2->unp_conn = NULL;
968 		unp2->unp_socket->so_snd.sb_mbcnt = 0;
969 		unp2->unp_socket->so_snd.sb_cc = 0;
970 		soisdisconnected(unp2->unp_socket);
971 		break;
972 	}
973 
974 	if (so2 != unp->unp_socket)
975 		sounlock(so2);
976 }
977 
978 void
979 unp_shutdown(struct unpcb *unp)
980 {
981 	struct socket *so2;
982 
983 	switch (unp->unp_socket->so_type) {
984 	case SOCK_STREAM:
985 	case SOCK_SEQPACKET:
986 		if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
987 			break;
988 
989 		socantrcvmore(so2);
990 		sounlock(so2);
991 
992 		break;
993 	default:
994 		break;
995 	}
996 }
997 
998 static struct unpcb *
999 fptounp(struct file *fp)
1000 {
1001 	struct socket *so;
1002 
1003 	if (fp->f_type != DTYPE_SOCKET)
1004 		return (NULL);
1005 	if ((so = fp->f_data) == NULL)
1006 		return (NULL);
1007 	if (so->so_proto->pr_domain != &unixdomain)
1008 		return (NULL);
1009 	return (sotounpcb(so));
1010 }
1011 
1012 int
1013 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
1014 {
1015 	struct proc *p = curproc;		/* XXX */
1016 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1017 	struct filedesc *fdp = p->p_fd;
1018 	int i, *fds = NULL;
1019 	struct fdpass *rp;
1020 	struct file *fp;
1021 	int nfds, error = 0;
1022 
1023 	/*
1024 	 * This code only works because SCM_RIGHTS is the only supported
1025 	 * control message type on unix sockets. Enforce this here.
1026 	 */
1027 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
1028 		return EINVAL;
1029 
1030 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1031 	    sizeof(struct fdpass);
1032 	if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
1033 		controllen = 0;
1034 	else
1035 		controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
1036 	if (nfds > controllen / sizeof(int)) {
1037 		error = EMSGSIZE;
1038 		goto out;
1039 	}
1040 
1041 	/* Make sure the recipient should be able to see the descriptors.. */
1042 	rp = (struct fdpass *)CMSG_DATA(cm);
1043 
1044 	/* fdp->fd_rdir requires KERNEL_LOCK() */
1045 	KERNEL_LOCK();
1046 
1047 	for (i = 0; i < nfds; i++) {
1048 		fp = rp->fp;
1049 		rp++;
1050 		error = pledge_recvfd(p, fp);
1051 		if (error)
1052 			break;
1053 
1054 		/*
1055 		 * No to block devices.  If passing a directory,
1056 		 * make sure that it is underneath the root.
1057 		 */
1058 		if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
1059 			struct vnode *vp = (struct vnode *)fp->f_data;
1060 
1061 			if (vp->v_type == VBLK ||
1062 			    (vp->v_type == VDIR &&
1063 			    !vn_isunder(vp, fdp->fd_rdir, p))) {
1064 				error = EPERM;
1065 				break;
1066 			}
1067 		}
1068 	}
1069 
1070 	KERNEL_UNLOCK();
1071 
1072 	if (error)
1073 		goto out;
1074 
1075 	fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
1076 
1077 	fdplock(fdp);
1078 restart:
1079 	/*
1080 	 * First loop -- allocate file descriptor table slots for the
1081 	 * new descriptors.
1082 	 */
1083 	rp = ((struct fdpass *)CMSG_DATA(cm));
1084 	for (i = 0; i < nfds; i++) {
1085 		if ((error = fdalloc(p, 0, &fds[i])) != 0) {
1086 			/*
1087 			 * Back out what we've done so far.
1088 			 */
1089 			for (--i; i >= 0; i--)
1090 				fdremove(fdp, fds[i]);
1091 
1092 			if (error == ENOSPC) {
1093 				fdexpand(p);
1094 				goto restart;
1095 			}
1096 
1097 			fdpunlock(fdp);
1098 
1099 			/*
1100 			 * This is the error that has historically
1101 			 * been returned, and some callers may
1102 			 * expect it.
1103 			 */
1104 
1105 			error = EMSGSIZE;
1106 			goto out;
1107 		}
1108 
1109 		/*
1110 		 * Make the slot reference the descriptor so that
1111 		 * fdalloc() works properly.. We finalize it all
1112 		 * in the loop below.
1113 		 */
1114 		mtx_enter(&fdp->fd_fplock);
1115 		KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
1116 		fdp->fd_ofiles[fds[i]] = rp->fp;
1117 		mtx_leave(&fdp->fd_fplock);
1118 
1119 		fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
1120 		if (flags & MSG_CMSG_CLOEXEC)
1121 			fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
1122 
1123 		rp++;
1124 	}
1125 
1126 	/*
1127 	 * Keep `fdp' locked to prevent concurrent close() of just
1128 	 * inserted descriptors. Such descriptors could have the only
1129 	 * `f_count' reference which is now shared between control
1130 	 * message and `fdp'.
1131 	 */
1132 
1133 	/*
1134 	 * Now that adding them has succeeded, update all of the
1135 	 * descriptor passing state.
1136 	 */
1137 	rp = (struct fdpass *)CMSG_DATA(cm);
1138 
1139 	for (i = 0; i < nfds; i++) {
1140 		struct unpcb *unp;
1141 
1142 		fp = rp->fp;
1143 		rp++;
1144 		if ((unp = fptounp(fp)) != NULL) {
1145 			rw_enter_write(&unp_gc_lock);
1146 			unp->unp_msgcount--;
1147 			rw_exit_write(&unp_gc_lock);
1148 		}
1149 	}
1150 	fdpunlock(fdp);
1151 
1152 	mtx_enter(&unp_rights_mtx);
1153 	unp_rights -= nfds;
1154 	mtx_leave(&unp_rights_mtx);
1155 
1156 	/*
1157 	 * Copy temporary array to message and adjust length, in case of
1158 	 * transition from large struct file pointers to ints.
1159 	 */
1160 	memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
1161 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
1162 	rights->m_len = CMSG_LEN(nfds * sizeof(int));
1163  out:
1164 	if (fds != NULL)
1165 		free(fds, M_TEMP, nfds * sizeof(int));
1166 
1167 	if (error) {
1168 		if (nfds > 0) {
1169 			/*
1170 			 * No lock required. We are the only `cm' holder.
1171 			 */
1172 			rp = ((struct fdpass *)CMSG_DATA(cm));
1173 			unp_discard(rp, nfds);
1174 		}
1175 	}
1176 
1177 	return (error);
1178 }
1179 
1180 int
1181 unp_internalize(struct mbuf *control, struct proc *p)
1182 {
1183 	struct filedesc *fdp = p->p_fd;
1184 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1185 	struct fdpass *rp;
1186 	struct file *fp;
1187 	struct unpcb *unp;
1188 	int i, error;
1189 	int nfds, *ip, fd, neededspace;
1190 
1191 	/*
1192 	 * Check for two potential msg_controllen values because
1193 	 * IETF stuck their nose in a place it does not belong.
1194 	 */
1195 	if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
1196 		return (EINVAL);
1197 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1198 	    !(cm->cmsg_len == control->m_len ||
1199 	    control->m_len == CMSG_ALIGN(cm->cmsg_len)))
1200 		return (EINVAL);
1201 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
1202 
1203 	mtx_enter(&unp_rights_mtx);
1204 	if (unp_rights + nfds > maxfiles / 10) {
1205 		mtx_leave(&unp_rights_mtx);
1206 		return (EMFILE);
1207 	}
1208 	unp_rights += nfds;
1209 	mtx_leave(&unp_rights_mtx);
1210 
1211 	/* Make sure we have room for the struct file pointers */
1212 morespace:
1213 	neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
1214 	    control->m_len;
1215 	if (neededspace > m_trailingspace(control)) {
1216 		char *tmp;
1217 		/* if we already have a cluster, the message is just too big */
1218 		if (control->m_flags & M_EXT) {
1219 			error = E2BIG;
1220 			goto nospace;
1221 		}
1222 
1223 		/* copy cmsg data temporarily out of the mbuf */
1224 		tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
1225 		memcpy(tmp, mtod(control, caddr_t), control->m_len);
1226 
1227 		/* allocate a cluster and try again */
1228 		MCLGET(control, M_WAIT);
1229 		if ((control->m_flags & M_EXT) == 0) {
1230 			free(tmp, M_TEMP, control->m_len);
1231 			error = ENOBUFS;       /* allocation failed */
1232 			goto nospace;
1233 		}
1234 
1235 		/* copy the data back into the cluster */
1236 		cm = mtod(control, struct cmsghdr *);
1237 		memcpy(cm, tmp, control->m_len);
1238 		free(tmp, M_TEMP, control->m_len);
1239 		goto morespace;
1240 	}
1241 
1242 	/* adjust message & mbuf to note amount of space actually used. */
1243 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
1244 	control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
1245 
1246 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
1247 	rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
1248 	fdplock(fdp);
1249 	for (i = 0; i < nfds; i++) {
1250 		memcpy(&fd, ip, sizeof fd);
1251 		ip--;
1252 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
1253 			error = EBADF;
1254 			goto fail;
1255 		}
1256 		if (fp->f_count >= FDUP_MAX_COUNT) {
1257 			error = EDEADLK;
1258 			goto fail;
1259 		}
1260 		error = pledge_sendfd(p, fp);
1261 		if (error)
1262 			goto fail;
1263 
1264 		/* kqueue descriptors cannot be copied */
1265 		if (fp->f_type == DTYPE_KQUEUE) {
1266 			error = EINVAL;
1267 			goto fail;
1268 		}
1269 #if NKCOV > 0
1270 		/* kcov descriptors cannot be copied */
1271 		if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) {
1272 			error = EINVAL;
1273 			goto fail;
1274 		}
1275 #endif
1276 		rp->fp = fp;
1277 		rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
1278 		rp--;
1279 		if ((unp = fptounp(fp)) != NULL) {
1280 			rw_enter_write(&unp_gc_lock);
1281 			unp->unp_msgcount++;
1282 			unp->unp_file = fp;
1283 			rw_exit_write(&unp_gc_lock);
1284 		}
1285 	}
1286 	fdpunlock(fdp);
1287 	return (0);
1288 fail:
1289 	fdpunlock(fdp);
1290 	if (fp != NULL)
1291 		FRELE(fp, p);
1292 	/* Back out what we just did. */
1293 	for ( ; i > 0; i--) {
1294 		rp++;
1295 		fp = rp->fp;
1296 		if ((unp = fptounp(fp)) != NULL) {
1297 			rw_enter_write(&unp_gc_lock);
1298 			unp->unp_msgcount--;
1299 			rw_exit_write(&unp_gc_lock);
1300 		}
1301 		FRELE(fp, p);
1302 	}
1303 
1304 nospace:
1305 	mtx_enter(&unp_rights_mtx);
1306 	unp_rights -= nfds;
1307 	mtx_leave(&unp_rights_mtx);
1308 
1309 	return (error);
1310 }
1311 
1312 void
1313 unp_gc(void *arg __unused)
1314 {
1315 	struct unp_deferral *defer;
1316 	struct file *fp;
1317 	struct socket *so;
1318 	struct unpcb *unp;
1319 	int nunref, i;
1320 
1321 	rw_enter_write(&unp_gc_lock);
1322 	if (unp_gcing)
1323 		goto unlock;
1324 	unp_gcing = 1;
1325 	rw_exit_write(&unp_gc_lock);
1326 
1327 	rw_enter_write(&unp_df_lock);
1328 	/* close any fds on the deferred list */
1329 	while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
1330 		SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
1331 		rw_exit_write(&unp_df_lock);
1332 		for (i = 0; i < defer->ud_n; i++) {
1333 			fp = defer->ud_fp[i].fp;
1334 			if (fp == NULL)
1335 				continue;
1336 			if ((unp = fptounp(fp)) != NULL) {
1337 				rw_enter_write(&unp_gc_lock);
1338 				unp->unp_msgcount--;
1339 				rw_exit_write(&unp_gc_lock);
1340 			}
1341 			mtx_enter(&unp_rights_mtx);
1342 			unp_rights--;
1343 			mtx_leave(&unp_rights_mtx);
1344 			 /* closef() expects a refcount of 2 */
1345 			FREF(fp);
1346 			(void) closef(fp, NULL);
1347 		}
1348 		free(defer, M_TEMP, sizeof(*defer) +
1349 		    sizeof(struct fdpass) * defer->ud_n);
1350 		rw_enter_write(&unp_df_lock);
1351 	}
1352 	rw_exit_write(&unp_df_lock);
1353 
1354 	nunref = 0;
1355 
1356 	rw_enter_write(&unp_gc_lock);
1357 
1358 	/*
1359 	 * Determine sockets which may be prospectively dead. Such
1360 	 * sockets have their `unp_msgcount' equal to the `f_count'.
1361 	 * If `unp_msgcount' is 0, the socket has not been passed
1362 	 * and can't be unreferenced.
1363 	 */
1364 	LIST_FOREACH(unp, &unp_head, unp_link) {
1365 		unp->unp_gcflags = 0;
1366 
1367 		if (unp->unp_msgcount == 0)
1368 			continue;
1369 		if ((fp = unp->unp_file) == NULL)
1370 			continue;
1371 		if (fp->f_count == unp->unp_msgcount) {
1372 			unp->unp_gcflags |= UNP_GCDEAD;
1373 			unp->unp_gcrefs = unp->unp_msgcount;
1374 			nunref++;
1375 		}
1376 	}
1377 
1378 	/*
1379 	 * Scan all sockets previously marked as dead. Remove
1380 	 * the `unp_gcrefs' reference each socket holds on any
1381 	 * dead socket in its buffer.
1382 	 */
1383 	LIST_FOREACH(unp, &unp_head, unp_link) {
1384 		if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1385 			continue;
1386 		so = unp->unp_socket;
1387 		solock(so);
1388 		unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
1389 		sounlock(so);
1390 	}
1391 
1392 	/*
1393 	 * If the dead socket has `unp_gcrefs' reference counter
1394 	 * greater than 0, it can't be unreferenced. Mark it as
1395 	 * alive and increment the `unp_gcrefs' reference for each
1396 	 * dead socket within its buffer. Repeat this until we
1397 	 * have no new alive sockets found.
1398 	 */
1399 	do {
1400 		unp_defer = 0;
1401 
1402 		LIST_FOREACH(unp, &unp_head, unp_link) {
1403 			if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1404 				continue;
1405 			if (unp->unp_gcrefs == 0)
1406 				continue;
1407 
1408 			unp->unp_gcflags &= ~UNP_GCDEAD;
1409 
1410 			so = unp->unp_socket;
1411 			solock(so);
1412 			unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
1413 			sounlock(so);
1414 
1415 			KASSERT(nunref > 0);
1416 			nunref--;
1417 		}
1418 	} while (unp_defer > 0);
1419 
1420 	/*
1421 	 * If there are any unreferenced sockets, then for each dispose
1422 	 * of files in its receive buffer and then close it.
1423 	 */
1424 	if (nunref) {
1425 		LIST_FOREACH(unp, &unp_head, unp_link) {
1426 			if (unp->unp_gcflags & UNP_GCDEAD) {
1427 				/*
1428 				 * This socket could still be connected
1429 				 * and if so it's `so_rcv' is still
1430 				 * accessible by concurrent PRU_SEND
1431 				 * thread.
1432 				 */
1433 				so = unp->unp_socket;
1434 				solock(so);
1435 				unp_scan(so->so_rcv.sb_mb, unp_discard);
1436 				sounlock(so);
1437 			}
1438 		}
1439 	}
1440 
1441 	unp_gcing = 0;
1442 unlock:
1443 	rw_exit_write(&unp_gc_lock);
1444 }
1445 
1446 void
1447 unp_dispose(struct mbuf *m)
1448 {
1449 
1450 	if (m)
1451 		unp_scan(m, unp_discard);
1452 }
1453 
1454 void
1455 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1456 {
1457 	struct mbuf *m;
1458 	struct fdpass *rp;
1459 	struct cmsghdr *cm;
1460 	int qfds;
1461 
1462 	while (m0) {
1463 		for (m = m0; m; m = m->m_next) {
1464 			if (m->m_type == MT_CONTROL &&
1465 			    m->m_len >= sizeof(*cm)) {
1466 				cm = mtod(m, struct cmsghdr *);
1467 				if (cm->cmsg_level != SOL_SOCKET ||
1468 				    cm->cmsg_type != SCM_RIGHTS)
1469 					continue;
1470 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1471 				    / sizeof(struct fdpass);
1472 				if (qfds > 0) {
1473 					rp = (struct fdpass *)CMSG_DATA(cm);
1474 					op(rp, qfds);
1475 				}
1476 				break;		/* XXX, but saves time */
1477 			}
1478 		}
1479 		m0 = m0->m_nextpkt;
1480 	}
1481 }
1482 
1483 void
1484 unp_discard(struct fdpass *rp, int nfds)
1485 {
1486 	struct unp_deferral *defer;
1487 
1488 	/* copy the file pointers to a deferral structure */
1489 	defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1490 	defer->ud_n = nfds;
1491 	memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1492 	memset(rp, 0, sizeof(*rp) * nfds);
1493 
1494 	rw_enter_write(&unp_df_lock);
1495 	SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1496 	rw_exit_write(&unp_df_lock);
1497 
1498 	task_add(systqmp, &unp_gc_task);
1499 }
1500 
1501 void
1502 unp_remove_gcrefs(struct fdpass *rp, int nfds)
1503 {
1504 	struct unpcb *unp;
1505 	int i;
1506 
1507 	rw_assert_wrlock(&unp_gc_lock);
1508 
1509 	for (i = 0; i < nfds; i++) {
1510 		if (rp[i].fp == NULL)
1511 			continue;
1512 		if ((unp = fptounp(rp[i].fp)) == NULL)
1513 			continue;
1514 		if (unp->unp_gcflags & UNP_GCDEAD) {
1515 			KASSERT(unp->unp_gcrefs > 0);
1516 			unp->unp_gcrefs--;
1517 		}
1518 	}
1519 }
1520 
1521 void
1522 unp_restore_gcrefs(struct fdpass *rp, int nfds)
1523 {
1524 	struct unpcb *unp;
1525 	int i;
1526 
1527 	rw_assert_wrlock(&unp_gc_lock);
1528 
1529 	for (i = 0; i < nfds; i++) {
1530 		if (rp[i].fp == NULL)
1531 			continue;
1532 		if ((unp = fptounp(rp[i].fp)) == NULL)
1533 			continue;
1534 		if (unp->unp_gcflags & UNP_GCDEAD) {
1535 			unp->unp_gcrefs++;
1536 			unp_defer++;
1537 		}
1538 	}
1539 }
1540 
1541 int
1542 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1543 {
1544 	struct sockaddr *sa = mtod(nam, struct sockaddr *);
1545 	size_t size, len;
1546 
1547 	if (nam->m_len < offsetof(struct sockaddr, sa_data))
1548 		return EINVAL;
1549 	if (sa->sa_family != AF_UNIX)
1550 		return EAFNOSUPPORT;
1551 	if (sa->sa_len != nam->m_len)
1552 		return EINVAL;
1553 	if (sa->sa_len > sizeof(struct sockaddr_un))
1554 		return EINVAL;
1555 	*sun = (struct sockaddr_un *)sa;
1556 
1557 	/* ensure that sun_path is NUL terminated and fits */
1558 	size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1559 	len = strnlen((*sun)->sun_path, size);
1560 	if (len == sizeof((*sun)->sun_path))
1561 		return EINVAL;
1562 	if (len == size) {
1563 		if (m_trailingspace(nam) == 0)
1564 			return EINVAL;
1565 		nam->m_len++;
1566 		(*sun)->sun_len++;
1567 		(*sun)->sun_path[len] = '\0';
1568 	}
1569 	if (pathlen != NULL)
1570 		*pathlen = len;
1571 
1572 	return 0;
1573 }
1574